57 files changed, 8247 insertions, 7193 deletions
diff --git a/examples/BUILD.bazel b/examples/BUILD.bazel
new file mode 100644
index 0000000000..ad9abf50a2
--- /dev/null
+++ b/examples/BUILD.bazel
@@ -0,0 +1,540 @@
+# Copyright (c) 2023 Arm Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+#---------------------------------------------------------------------
+# Neon examples
+
+cc_binary(
+    name = "neon_cnn",
+    srcs = ["neon_cnn.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "neon_copy_objects",
+    srcs = ["neon_copy_objects.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "neon_gemm_qasymm8",
+    srcs = ["neon_gemm_qasymm8.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "neon_permute",
+    srcs = ["neon_permute.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "neon_scale",
+    srcs = ["neon_scale.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "neon_sgemm",
+    srcs = ["neon_sgemm.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+#---------------------------------------------------------------------
+# Graph examples
+
+cc_binary(
+    name = "graph_alexnet",
+    srcs = ["graph_alexnet.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_deepspeech_v0_4_1",
+    srcs = ["graph_deepspeech_v0_4_1.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_edsr",
+    srcs = [
+        "graph_edsr.cpp",
+        "graph_edsr.h",
+    ],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_googlenet",
+    srcs = ["graph_googlenet.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_inception_resnet_v1",
+    srcs = ["graph_inception_resnet_v1.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_inception_resnet_v2",
+    srcs = ["graph_inception_resnet_v2.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_inception_v3",
+    srcs = ["graph_inception_v3.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_inception_v4",
+    srcs = ["graph_inception_v4.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_lenet",
+    srcs = ["graph_lenet.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_mobilenet",
+    srcs = ["graph_mobilenet.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_mobilenet_v2",
+    srcs = ["graph_mobilenet_v2.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_resnet12",
+    srcs = ["graph_resnet12.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_resnet50",
+    srcs = ["graph_resnet50.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_resnet_v2_50",
+    srcs = ["graph_resnet_v2_50.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_resnext50",
+    srcs = ["graph_resnext50.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_shufflenet",
+    srcs = ["graph_shufflenet.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_squeezenet",
+    srcs = ["graph_squeezenet.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_squeezenet_v1_1",
+    srcs = ["graph_squeezenet_v1_1.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_srcnn955",
+    srcs = ["graph_srcnn955.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_ssd_mobilenet",
+    srcs = ["graph_ssd_mobilenet.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_vgg16",
+    srcs = ["graph_vgg16.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_vgg19",
+    srcs = ["graph_vgg19.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_vgg_vdsr",
+    srcs = ["graph_vgg_vdsr.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
+
+cc_binary(
+    name = "graph_yolov3",
+    srcs = ["graph_yolov3.cpp"],
+    copts = select({
+                  "//:arch_armv8-a": ["-march=armv8-a"],
+                  "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"],
+                  "//conditions:default": ["-march=armv8-a"],
+              }),
+    linkstatic = False,
+    deps = [
+        "//:arm_compute",
+        "//:arm_compute_graph",
+        "//include",
+        "//utils",
+    ],
+)
diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt
new file mode 100644
index 0000000000..6b7fbded5d
--- /dev/null
+++ b/examples/CMakeLists.txt
@@ -0,0 +1,57 @@
+# Copyright (c) 2023-2024 Arm Limited.
+#
+# SPDX-License-Identifier: MIT
+#
+# Permission is hereby granted, free of charge, to any person obtaining a copy
+# of this software and associated documentation files (the "Software"), to
+# deal in the Software without restriction, including without limitation the
+# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+# sell copies of the Software, and to permit persons to whom the Software is
+# furnished to do so, subject to the following conditions:
+#
+# The above copyright notice and this permission notice shall be included in all
+# copies or substantial portions of the Software.
+#
+# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+# SOFTWARE.
+
+set(EXAMPLE_GRAPH_NAMES
+    graph_alexnet
+    graph_deepspeech_v0_4_1
+    graph_edsr
+    graph_googlenet
+    graph_inception_resnet_v1
+    graph_inception_resnet_v2
+    graph_inception_v3
+    graph_inception_v4
+    graph_lenet
+    graph_mobilenet_v2
+    graph_mobilenet
+    graph_resnet_v2_50
+    graph_resnet12
+    graph_resnet50
+    graph_resnext50
+    graph_shufflenet
+    graph_squeezenet_v1_1
+    graph_squeezenet
+    graph_srcnn955
+    graph_ssd_mobilenet
+    graph_vgg_vdsr
+    graph_vgg16
+    graph_vgg19
+    graph_yolov3
+    PARENT_SCOPE)
+
+set(EXAMPLE_NEON_NAMES
+    neon_cnn neon_copy_objects
+    neon_gemm_qasymm8
+    neon_gemm_s8_f32
+    neon_permute
+    neon_scale
+    neon_sgemm
+    PARENT_SCOPE)
diff --git a/examples/SConscript b/examples/SConscript
index 8971d3c6d8..8ece7e60b2 100644
--- a/examples/SConscript
+++ b/examples/SConscript
@@ -1,4 +1,7 @@
-# Copyright (c) 2017 ARM Limited.
+#!/usr/bin/python
+# -*- coding: utf-8 -*-
+
+# Copyright (c) 2017-2024 Arm Limited.
 #
 # SPDX-License-Identifier: MIT
 #
@@ -32,22 +35,38 @@ examples_env.Append(CPPPATH = ["#"])
 # Build examples
 utils = examples_env.Object("../utils/Utils.cpp")
 
-if env['os'] in ['android', 'bare_metal'] or env['standalone']:
+if env['os'] in ['android', 'macos', 'bare_metal'] or env['standalone']:
     Import('arm_compute_graph_a')
     Import('arm_compute_a')
-    Import('arm_compute_core_a')
-    arm_compute_libs = [ arm_compute_a, arm_compute_core_a ]
+    arm_compute_libs = [ arm_compute_a ]
     arm_compute_graph_libs = arm_compute_libs # The graph library needs to be linked separately with --whole-archive
     arm_compute_dependency = arm_compute_a
     graph_dependency = [arm_compute_graph_a]
 else:
     Import('arm_compute_graph_so')
     Import('arm_compute_so')
-    arm_compute_libs = ["arm_compute", "arm_compute_core"]
+    arm_compute_libs = ["arm_compute"]
     arm_compute_graph_libs = [ "arm_compute_graph" ] + arm_compute_libs
     arm_compute_dependency = arm_compute_so
     graph_dependency = [arm_compute_graph_so]
 
+extra_link_flags = []
+
+if not env['os'] in ['windows','bare_metal'] :
+    extra_link_flags += ['-fstack-protector-strong']
+
+
+if env['os'] != 'windows' :
+    load_whole_archive = '-Wl,--whole-archive'
+    noload_whole_archive = '-Wl,--no-whole-archive'
+else:
+    load_whole_archive = '/wholearchive'
+    noload_whole_archive = ''
+
+if 'macos' in examples_env['os']:
+    load_whole_archive = '-Wl,-force_load'
+    noload_whole_archive = ''
+
 # Build graph examples
 graph_utils = examples_env.Object("../utils/GraphUtils.cpp")
 graph_utils += examples_env.Object("../utils/CommonGraphOptions.cpp")
@@ -55,9 +74,8 @@ examples_libs = examples_env.get("LIBS",[])
 for file in Glob("./graph_*.cpp"):
     example = os.path.basename(os.path.splitext(str(file))[0])
     prog = None
-
-    if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-        prog = examples_env.Program(example, ["{}.cpp".format(example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive', '-fstack-protector-strong'])
+    if env['os'] in ['android','windows', 'macos', 'bare_metal'] or env['standalone']:
+        prog = examples_env.Program(example, ["{}.cpp".format(example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+[load_whole_archive, graph_dependency, noload_whole_archive] + extra_link_flags)
         Depends(prog, graph_dependency)
         prog = install_bin(prog)
     else:
@@ -69,18 +87,20 @@ for file in Glob("./graph_*.cpp"):
     Default(alias)
 
 if env['opencl'] and env['neon']:
+    examples_env.Append(CPPDEFINES = ['ARM_COMPUTE_CL'])
     for file in Glob("./neoncl_*.cpp"):
         example = os.path.basename(os.path.splitext(str(file))[0])
-        prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = examples_libs + arm_compute_libs)
+        prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = examples_libs + arm_compute_libs)
         Depends(prog, arm_compute_dependency)
         prog = install_bin(prog)
         alias = examples_env.Alias(example, prog)
         Default(alias)
 
 if env['opencl']:
+    examples_env.Append(CPPDEFINES = ['ARM_COMPUTE_CL'])
     for file in Glob("./cl_*.cpp"):
         example = os.path.basename(os.path.splitext(str(file))[0])
-        prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = examples_libs + arm_compute_libs)
+        prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = examples_libs + arm_compute_libs)
         Depends(prog, arm_compute_dependency)
         prog = install_bin(prog)
         alias = examples_env.Alias(example, prog)
@@ -91,9 +111,15 @@ if env['gemm_tuner'] and env['opencl']:
     for file in Glob("./gemm_tuner/cl_*.cpp"):
         example = os.path.basename(os.path.splitext(str(file))[0])
         example = os.path.join("gemm_tuner", example)
-        prog = examples_env.Program(example, ["{}.cpp".format(example), utils, gemm_tuner_common_options], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = examples_libs + arm_compute_libs)
-        Depends(prog, arm_compute_dependency)
-        prog = install_bin(prog)
+        if env['os'] in ['android', 'macos', 'bare_metal'] or env['standalone']:
+            prog = examples_env.Program(example, ["{}.cpp".format(example), utils, gemm_tuner_common_options],  LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+[load_whole_archive, graph_dependency, noload_whole_archive, '-fstack-protector-strong'] )
+            Depends(prog, graph_dependency)
+            prog = install_bin(prog)
+        else:
+            #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies
+            prog = examples_env.Program(example, ["{}.cpp".format(example), utils, gemm_tuner_common_options],  LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] )
+            Depends(prog, graph_dependency)
+            prog = install_bin(prog)
         alias = examples_env.Alias(example, prog)
         Default(alias)
 
@@ -103,7 +129,7 @@ if env['neon']:
 
         prog = None
         if env['os'] in ['bare_metal']:
-            prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LINKFLAGS=examples_env["LINKFLAGS"]+['-fstack-protector'], LIBS = examples_libs + arm_compute_libs)
+            prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LINKFLAGS=examples_env["LINKFLAGS"], LIBS = examples_libs + arm_compute_libs)
         else:
             prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = examples_libs + arm_compute_libs)
 
@@ -112,28 +138,19 @@ if env['neon']:
         alias = examples_env.Alias(example, prog)
         Default(alias)
 
-if env['gles_compute']:
-    for file in Glob("./gc_*.cpp"):
+if env['external_tests_dir']:
+    for file in Glob(env['external_tests_dir'] + "/examples/graph_*.cpp"):
         example = os.path.basename(os.path.splitext(str(file))[0])
-        prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = examples_libs + arm_compute_libs)
-        Depends(prog, arm_compute_dependency)
-        prog = install_bin(prog)
+        prog = None
+
+        if env['os'] in ['android', 'macos', 'bare_metal'] or env['standalone']:
+            prog = examples_env.Program(example, [examples_env.Object(source=file, target=example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+[load_whole_archive, graph_dependency, noload_whole_archive] + extra_link_flags)
+            Depends(prog, graph_dependency)
+            prog = install_bin(prog)
+        else:
+            #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies
+            prog = examples_env.Program(example, [examples_env.Object(source=file, target=example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] )
+            Depends(prog, graph_dependency)
+            prog = install_bin(prog)
         alias = examples_env.Alias(example, prog)
         Default(alias)
-
-#FIXME Delete 3rdparty builds before release
-for file in Glob("#3rdparty/examples/graph_*.cpp"):
-    example = os.path.basename(os.path.splitext(str(file))[0])
-    prog = None
-
-    if env['os'] in ['android', 'bare_metal'] or env['standalone']:
-        prog = examples_env.Program(example, [examples_env.Object(source=file, target=example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive', '-fstack-protector-strong'])
-        Depends(prog, graph_dependency)
-        prog = install_bin(prog)
-    else:
-        #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies
-        prog = examples_env.Program(example, [examples_env.Object(source=file, target=example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] )
-        Depends(prog, graph_dependency)
-        prog = install_bin(prog)
-    alias = examples_env.Alias(example, prog)
-    Default(alias)
diff --git a/examples/cl_cache.cpp b/examples/cl_cache.cpp
index a1a2d2548a..9da5b9176d 100644
--- a/examples/cl_cache.cpp
+++ b/examples/cl_cache.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,12 +21,13 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "arm_compute/runtime/CL/CLFunctions.h"
-
+#include "arm_compute/core/CL/OpenCL.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/CL/CLHelpers.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/functions/CLPermute.h"
 #include "arm_compute/runtime/CL/Utils.h"
+
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -43,14 +44,15 @@ public:
 
     bool do_setup(int argc, char **argv) override
     {
-        std::cout << "Once the program has run and created the file cache.bin, rerun with --restore_cache." << std::endl;
+        std::cout << "Once the program has run and created the file cache.bin, rerun with --restore_cache."
+                  << std::endl;
         CLScheduler::get().default_init();
 
-        if(argc > 1)
+        if (argc > 1)
         {
             std::string argv1 = argv[1];
             std::transform(argv1.begin(), argv1.end(), argv1.begin(), ::tolower);
-            if(argv1 == "--restore_cache")
+            if (argv1 == "--restore_cache")
             {
                 // Load the precompiled kernels from a file into the kernel library, in this way the next time they are needed
                 // compilation won't be required.
@@ -110,11 +112,13 @@ private:
         window.use_tensor_dimensions(reference.info()->tensor_shape());
         Iterator it_ref(&reference, window);
         Iterator it_res(&result, window);
-        execute_window_loop(window, [&](const Coordinates &)
-        {
-            assert(*reinterpret_cast<unsigned char *>(it_ref.ptr()) == *reinterpret_cast<unsigned char *>(it_res.ptr()));
-        },
-        it_ref, it_res);
+        execute_window_loop(
+            window,
+            [&](const Coordinates &) {
+                assert(*reinterpret_cast<unsigned char *>(it_ref.ptr()) ==
+                       *reinterpret_cast<unsigned char *>(it_res.ptr()));
+            },
+            it_ref, it_res);
         reference.unmap();
         result.unmap();
     }
@@ -126,11 +130,9 @@ private:
         window.use_tensor_dimensions(tensor.info()->tensor_shape());
         Iterator      it_tensor(&tensor, window);
         unsigned char val(0);
-        execute_window_loop(window, [&](const Coordinates &)
-        {
-            *reinterpret_cast<unsigned char *>(it_tensor.ptr()) = val++;
-        },
-        it_tensor);
+        execute_window_loop(
+            window, [&](const Coordinates &) { *reinterpret_cast<unsigned char *>(it_tensor.ptr()) = val++; },
+            it_tensor);
         tensor.unmap();
     }
     void init_tensor(const TensorShape shape, CLTensor &tensor, DataType type, DataLayout layout)
diff --git a/examples/cl_convolution.cpp b/examples/cl_convolution.cpp
deleted file mode 100644
index f2d19ef0cc..0000000000
--- a/examples/cl_convolution.cpp
+++ /dev/null
@@ -1,136 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
-#error "This example needs to be built with -DARM_COMPUTE_CL"
-#endif /* ARM_COMPUTE_CL */
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-/** Gaussian 3x3 matrix
- */
-const std::array<int16_t, 9> gaussian3x3 =
-{
-    1, 2, 1,
-    2, 4, 2,
-    1, 2, 1
-};
-
-/** Gaussian 5x5 matrix
- */
-const std::array<int16_t, 25> gaussian5x5 =
-{
-    1, 4, 6, 4, 1,
-    4, 16, 24, 16, 4,
-    6, 24, 36, 24, 6,
-    4, 16, 24, 16, 4,
-    1, 4, 6, 4, 1
-};
-
-class CLConvolutionExample : public Example
-{
-public:
-    bool do_setup(int argc, char **argv) override
-    {
-        PPMLoader ppm;
-
-        CLScheduler::get().default_init();
-
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: ./build/cl_convolution [input_image.ppm]\n\n";
-            std::cout << "No input_image provided, creating a dummy 640x480 image\n";
-            // Create an empty grayscale 640x480 image
-            src.allocator()->init(TensorInfo(640, 480, Format::U8));
-        }
-        else
-        {
-            ppm.open(argv[1]);
-            ppm.init_image(src, Format::U8);
-        }
-
-        // Configure the temporary and destination images
-        tmp.allocator()->init(*src.info());
-        dst.allocator()->init(*src.info());
-
-        // Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5:
-        conv3x3.configure(&src, &tmp, gaussian3x3.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED);
-        conv5x5.configure(&tmp, &dst, gaussian5x5.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED);
-
-        // Allocate all the images
-        src.allocator()->allocate();
-        tmp.allocator()->allocate();
-        dst.allocator()->allocate();
-        // Fill the input image with the content of the PPM image if a filename was provided:
-        if(ppm.is_open())
-        {
-            ppm.fill_image(src);
-            output_filename = std::string(argv[1]) + "_out.ppm";
-        }
-
-        return true;
-    }
-    void do_run() override
-    {
-        // Execute the functions:
-        conv3x3.run();
-        conv5x5.run();
-
-        // Make sure all the OpenCL jobs are done executing:
-        CLScheduler::get().sync();
-    }
-    void do_teardown() override
-    {
-        // Save the result to file:
-        if(!output_filename.empty())
-        {
-            save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
-        }
-    }
-
-private:
-    CLImage          src{};
-    CLImage          tmp{};
-    CLImage          dst{};
-    CLConvolution3x3 conv3x3{};
-    CLConvolution5x5 conv5x5{};
-    std::string      output_filename{};
-};
-
-/** Main program for convolution test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-int main(int argc, char **argv)
-{
-    return utils::run_example<CLConvolutionExample>(argc, argv);
-}
diff --git a/examples/cl_events.cpp b/examples/cl_events.cpp
deleted file mode 100644
index a9c508ac58..0000000000
--- a/examples/cl_events.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
-#error "This example needs to be built with -DARM_COMPUTE_CL"
-#endif /* ARM_COMPUTE_CL */
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-class CLEventsExample : public Example
-{
-public:
-    bool do_setup(int argc, char **argv) override
-    {
-        /** [OpenCL events] **/
-        PPMLoader     ppm;
-        constexpr int scale_factor = 2;
-
-        CLScheduler::get().default_init();
-
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: ./build/cl_events [input_image.ppm]\n\n";
-            std::cout << "No input_image provided, creating a dummy 640x480 image\n";
-            // Create an empty grayscale 640x480 image
-            src.allocator()->init(TensorInfo(640, 480, Format::U8));
-        }
-        else
-        {
-            ppm.open(argv[1]);
-            ppm.init_image(src, Format::U8);
-        }
-
-        TensorInfo dst_info(src.info()->dimension(0) / scale_factor, src.info()->dimension(1) / scale_factor, Format::U8);
-
-        // Configure the temporary and destination images
-        dst.allocator()->init(dst_info);
-        tmp_scale_median.allocator()->init(dst_info);
-        tmp_median_gauss.allocator()->init(dst_info);
-
-        //Configure the functions:
-        scale.configure(&src, &tmp_scale_median, InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::REPLICATE);
-        median.configure(&tmp_scale_median, &tmp_median_gauss, BorderMode::REPLICATE);
-        gauss.configure(&tmp_median_gauss, &dst, BorderMode::REPLICATE);
-
-        // Allocate all the images
-        src.allocator()->allocate();
-        dst.allocator()->allocate();
-        tmp_scale_median.allocator()->allocate();
-        tmp_median_gauss.allocator()->allocate();
-
-        // Fill the input image with the content of the PPM image if a filename was provided:
-        if(ppm.is_open())
-        {
-            ppm.fill_image(src);
-            output_filename = std::string(argv[1]) + "_out.ppm";
-        }
-        /** [OpenCL events] **/
-
-        return true;
-    }
-    void do_run() override
-    {
-        // Enqueue and flush the scale OpenCL kernel:
-        scale.run();
-        // Create a synchronisation event between scale and median:
-        cl::Event scale_event = CLScheduler::get().enqueue_sync_event();
-        // Enqueue and flush the median OpenCL kernel:
-        median.run();
-        // Enqueue and flush the Gaussian OpenCL kernel:
-        gauss.run();
-
-        //Make sure all the OpenCL jobs are done executing:
-        scale_event.wait();        // Block until Scale is done executing (Median3x3 and Gaussian5x5 might still be running)
-        CLScheduler::get().sync(); // Block until Gaussian5x5 is done executing
-    }
-    void do_teardown() override
-    {
-        // Save the result to file:
-        if(!output_filename.empty())
-        {
-            save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
-        }
-    }
-
-private:
-    CLImage       src{}, tmp_scale_median{}, tmp_median_gauss{}, dst{};
-    CLScale       scale{};
-    CLMedian3x3   median{};
-    CLGaussian5x5 gauss{};
-    std::string   output_filename{};
-};
-
-/** Main program for convolution test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-int main(int argc, char **argv)
-{
-    return utils::run_example<CLEventsExample>(argc, argv);
-}
diff --git a/examples/cl_sgemm.cpp b/examples/cl_sgemm.cpp
index 8e0263dde2..68955c52f7 100644
--- a/examples/cl_sgemm.cpp
+++ b/examples/cl_sgemm.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2019 ARM Limited.
+ * Copyright (c) 2017-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -26,9 +26,10 @@
 #endif /* ARM_COMPUTE_CL */
 
 #include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/CLTuner.h"
+#include "arm_compute/runtime/CL/functions/CLGEMM.h"
+
 #include "utils/Utils.h"
 
 #include <cstdlib>
@@ -50,15 +51,16 @@ public:
         CLScheduler::get().default_init(&tuner);
 
         std::ifstream stream;
-        if(argc > 1)
+        if (argc > 1)
         {
             stream.open(argv[1], std::fstream::in);
         }
 
-        if(argc < 3 || (argc < 4 && stream.bad()))
+        if (argc < 3 || (argc < 4 && stream.bad()))
         {
             // Print help
-            std::cout << "Usage: 1) ./build/cl_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha = 1] [beta = 0]\n";
+            std::cout << "Usage: 1) ./build/cl_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha "
+                         "= 1] [beta = 0]\n";
             std::cout << "       2) ./build/cl_sgemm M N K [alpha = 1.0f] [beta = 0.0f]\n\n";
             std::cout << "Too few or no input_matrices provided. Using M=7, N=3, K=5, alpha=1.0f and beta=0.0f\n\n";
 
@@ -68,29 +70,29 @@ public:
         }
         else
         {
-            if(stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */
+            if (stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */
             {
                 npy0.open(argv[1]);
                 npy0.init_tensor(src0, DataType::F32);
                 npy1.open(argv[2]);
                 npy1.init_tensor(src1, DataType::F32);
 
-                if(argc > 3)
+                if (argc > 3)
                 {
                     stream.close();
                     stream.clear();
                     stream.open(argv[3], std::fstream::in);
-                    if(stream.good()) /* case with third file */
+                    if (stream.good()) /* case with third file */
                     {
                         npy2.open(argv[3]);
                         npy2.init_tensor(src2, DataType::F32);
 
-                        if(argc > 4)
+                        if (argc > 4)
                         {
                             // Convert string to float
                             alpha = strtof(argv[4], nullptr);
 
-                            if(argc > 5)
+                            if (argc > 5)
                             {
                                 // Convert string to float
                                 beta = strtof(argv[5], nullptr);
@@ -101,7 +103,7 @@ public:
                     {
                         alpha = strtof(argv[3], nullptr);
 
-                        if(argc > 4)
+                        if (argc > 4)
                         {
                             beta = strtof(argv[4], nullptr);
                         }
@@ -118,11 +120,11 @@ public:
                 src1.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32));
                 src2.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
 
-                if(argc > 4)
+                if (argc > 4)
                 {
                     alpha = strtof(argv[4], nullptr);
 
-                    if(argc > 5)
+                    if (argc > 5)
                     {
                         beta = strtof(argv[5], nullptr);
                     }
@@ -141,7 +143,7 @@ public:
         dst.allocator()->allocate();
 
         // Fill the input images with either the data provided or random data
-        if(npy0.is_open())
+        if (npy0.is_open())
         {
             npy0.fill_tensor(src0);
             npy1.fill_tensor(src1);
@@ -149,7 +151,7 @@ public:
             output_filename = "sgemm_out.npy";
             is_fortran      = npy0.is_fortran();
 
-            if(npy2.is_open())
+            if (npy2.is_open())
             {
                 src2.allocator()->allocate();
                 npy2.fill_tensor(src2);
@@ -179,7 +181,7 @@ public:
     }
     void do_teardown() override
     {
-        if(!output_filename.empty()) /* Save to .npy file */
+        if (!output_filename.empty()) /* Save to .npy file */
         {
             save_to_npy(dst, output_filename, is_fortran);
         }
diff --git a/examples/gc_absdiff.cpp b/examples/gc_absdiff.cpp
deleted file mode 100644
index 6793df040f..0000000000
--- a/examples/gc_absdiff.cpp
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_GC /* Needed by Utils.cpp to handle OpenGL ES exceptions properly */
-#error "This example needs to be built with -DARM_COMPUTE_GC"
-#endif /* ARM_COMPUTE_GC */
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-class GCAbsDiffExample : public Example
-{
-public:
-    bool do_setup(int argc, char **argv) override
-    {
-        PPMLoader ppm1{};
-        PPMLoader ppm2{};
-
-        GCScheduler::get().default_init();
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [input0_image.ppm] [input1_image.ppm] \n\n";
-            std::cout << "No input_image provided, creating two dummy 640x480 images\n";
-            // Create two empty grayscale 640x480 images
-            src1.allocator()->init(TensorInfo(640, 480, Format::U8));
-            src2.allocator()->init(TensorInfo(640, 480, Format::U8));
-        }
-        else if(argc < 3)
-        {
-            // Print help
-            std::cout << "Usage: " << argv[0] << " [input0_image.ppm] [input1_image.ppm] \n\n";
-            std::cout << "Only one input_image provided, creating a dummy 640x480 image\n";
-            ppm1.open(argv[1]);
-            ppm1.init_image(src1, Format::U8);
-            // Create an empty grayscale 640x480 image
-            src2.allocator()->init(TensorInfo(640, 480, Format::U8));
-        }
-        else
-        {
-            ppm1.open(argv[1]);
-            ppm1.init_image(src1, Format::U8);
-            ppm2.open(argv[2]);
-            ppm2.init_image(src2, Format::U8);
-        }
-
-        // Configure the temporary and destination images
-        dst.allocator()->init(*src1.info());
-
-        absdiff.configure(&src1, &src2, &dst);
-
-        // Allocate all the images
-        src1.allocator()->allocate();
-        src2.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        // Fill the input image with the content of the PPM image if a filename was provided:
-        if(ppm1.is_open())
-        {
-            ppm1.fill_image(src1);
-            output_filename = std::string(argv[1]) + "_out.ppm";
-        }
-        if(ppm2.is_open())
-        {
-            ppm2.fill_image(src2);
-        }
-
-        return true;
-    }
-    void do_run() override
-    {
-        // Execute the functions:
-        absdiff.run();
-    }
-    void do_teardown() override
-    {
-        // Save the result to file:
-        if(!output_filename.empty())
-        {
-            // save_to_ppm maps and unmaps the image to store as PPM
-            // The GCTensor::map call inside the save_to_ppm will block until all pending operations on that image have completed
-            save_to_ppm(dst, output_filename);
-        }
-    }
-
-private:
-    GCImage              src1{}, src2{}, dst{};
-    GCAbsoluteDifference absdiff{};
-    std::string          output_filename{};
-};
-
-/** Main program for absdiff test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to the first PPM image to process, [optional] Path the the second PPM image to process )
- */
-int main(int argc, char **argv)
-{
-    return utils::run_example<GCAbsDiffExample>(argc, argv);
-}
diff --git a/examples/gc_dc.cpp b/examples/gc_dc.cpp
deleted file mode 100644
index 6d09eba87c..0000000000
--- a/examples/gc_dc.cpp
+++ /dev/null
@@ -1,132 +0,0 @@
-/*
- * Copyright (c) 2017-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_GC
-#error "This example needs to be built with -DARM_COMPUTE_GC"
-#endif /* ARM_COMPUTE_GC */
-
-#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h"
-#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h"
-#include "half/half.hpp"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-class GCDCExample : public Example
-{
-public:
-    bool do_setup(int argc, char **argv) override
-    {
-        ARM_COMPUTE_UNUSED(argc);
-        ARM_COMPUTE_UNUSED(argv);
-
-        // init instance
-        GCScheduler::get().default_init();
-
-        const TensorShape  src_shape   = TensorShape{ 11U /* W */, 13U /* H */, 4U /* C */, 3U /* N */ };
-        const unsigned int kernel_size = 3;
-        const int          stride_x    = 1;
-        const int          stride_y    = 1;
-        const int          pad_x       = 0;
-        const int          pad_y       = 0;
-        const unsigned int num_kernels = 256;
-        const DataType     data_type   = DataType::F16;
-
-        // generate shape
-        const TensorShape   weights_shape(kernel_size, kernel_size, src_shape.z(), num_kernels);
-        const TensorShape   bias_shape(num_kernels);
-        const PadStrideInfo pad_info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR);
-
-        // output shape should be 9*11*256*3 (W*H*C*N)
-        const TensorShape dst_shape = get_output_shape(src_shape, weights_shape, pad_info);
-
-        // create tensors
-        src.allocator()->init(TensorInfo(src_shape, 1, data_type));
-        weights.allocator()->init(TensorInfo(weights_shape, 1, data_type));
-        bias.allocator()->init(TensorInfo(bias_shape, 1, data_type));
-        dst.allocator()->init(TensorInfo(dst_shape, 1, data_type));
-
-        // configure layer
-        conv.configure(&src, &weights, &bias, &dst, pad_info);
-
-        // allocate tensors
-        src.allocator()->allocate();
-        weights.allocator()->allocate();
-        bias.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        // To demonstrate how to fill tensor with some values...
-        src.map();
-        Window window;
-        window.use_tensor_dimensions(src_shape);
-        Iterator it(&src, window);
-        execute_window_loop(window, [&](const Coordinates &)
-        {
-            *reinterpret_cast<half_float::half *>(it.ptr()) = half_float::half(1.f);
-        });
-        src.unmap();
-
-        return true;
-    }
-    void do_run() override
-    {
-        // run the layer
-        conv.run();
-    }
-    void do_teardown() override
-    {
-        // check result
-        dst.map();
-        // do something
-        dst.unmap();
-    }
-
-private:
-    GCTensor                 src{}, weights{}, bias{}, dst{};
-    GCDirectConvolutionLayer conv{};
-
-    TensorShape get_output_shape(TensorShape in_shape, TensorShape kernel_shape, const PadStrideInfo &info)
-    {
-        TensorShape out_shape(in_shape);
-        const std::pair<unsigned int, unsigned int> scaled_dims = scaled_dimensions(in_shape.x(),
-                                                                                    in_shape.y(),
-                                                                                    kernel_shape.x(),
-                                                                                    kernel_shape.y(),
-                                                                                    info);
-        out_shape.set(0, scaled_dims.first);
-        out_shape.set(1, scaled_dims.second);
-        out_shape.set(2, kernel_shape[3]);
-        return out_shape;
-    }
-};
-
-/** Main program for directconvolution test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments
- */
-int main(int argc, char **argv)
-{
-    return utils::run_example<GCDCExample>(argc, argv);
-}
diff --git a/examples/gemm_tuner/CommonGemmExampleOptions.cpp b/examples/gemm_tuner/CommonGemmExampleOptions.cpp
index a93d0191b3..c2a465604a 100644
--- a/examples/gemm_tuner/CommonGemmExampleOptions.cpp
+++ b/examples/gemm_tuner/CommonGemmExampleOptions.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -34,30 +34,53 @@ using namespace utils;
     os << "N : " << common_params.N << std::endl;
     os << "K : " << common_params.K << std::endl;
     os << "B : " << common_params.B << std::endl;
+    os << "Data type : " << common_params.data_type << std::endl;
+    os << "OpenCL tuner mode : " << common_params.tuner_mode << std::endl;
     return os;
 }
 
-CommonGemmExampleOptions::CommonGemmExampleOptions(CommandLineParser &parser)
+CommonGemmExampleOptions::CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser,
+                                                   arm_compute::DataType                  default_data_type)
     : help(parser.add_option<ToggleOption>("help")),
       M(parser.add_positional_option<SimpleOption<size_t>>("M", 100)),
       N(parser.add_positional_option<SimpleOption<size_t>>("N", 100)),
       K(parser.add_positional_option<SimpleOption<size_t>>("K", 50)),
-      B(parser.add_positional_option<SimpleOption<size_t>>("B", 1))
+      B(parser.add_positional_option<SimpleOption<size_t>>("B", 1)),
+      data_type(),
+      tuner_mode()
 {
+    const std::set<DataType> supported_data_types{
+        DataType::F16,
+        DataType::F32,
+        DataType::QASYMM8,
+    };
+
+    const std::set<CLTunerMode> supported_tuner_modes{CLTunerMode::EXHAUSTIVE, CLTunerMode::NORMAL, CLTunerMode::RAPID};
+
+    ARM_COMPUTE_ERROR_ON_MSG(supported_data_types.find(default_data_type) == supported_data_types.end(),
+                             "Default data type unsupported");
+
+    data_type  = parser.add_option<EnumOption<DataType>>("type", supported_data_types, default_data_type);
+    tuner_mode = parser.add_option<EnumOption<CLTunerMode>>("tuner-mode", supported_tuner_modes, CLTunerMode::RAPID);
+
     help->set_help("Show this help message.");
     M->set_help("Number of lhs matrix rows.");
     N->set_help("Number of rhs matrix columns.");
     K->set_help("Number of lhs matrix columns/rhs matrix rows.");
     B->set_help("Batch size.");
+    data_type->set_help("Data type to use");
+    tuner_mode->set_help("OpenCL tuner mode");
 }
 
 CommonGemmExampleParams consume_common_gemm_example_parameters(const CommonGemmExampleOptions &options)
 {
     CommonGemmExampleParams common_params;
-    common_params.M = options.M->value();
-    common_params.N = options.N->value();
-    common_params.K = options.K->value();
-    common_params.B = options.B->value();
+    common_params.M          = options.M->value();
+    common_params.N          = options.N->value();
+    common_params.K          = options.K->value();
+    common_params.B          = options.B->value();
+    common_params.data_type  = options.data_type->value();
+    common_params.tuner_mode = options.tuner_mode->value();
     return common_params;
 }
 } // namespace gemm_tuner
diff --git a/examples/gemm_tuner/CommonGemmExampleOptions.h b/examples/gemm_tuner/CommonGemmExampleOptions.h
index 5f079abfc1..38178bcef8 100644
--- a/examples/gemm_tuner/CommonGemmExampleOptions.h
+++ b/examples/gemm_tuner/CommonGemmExampleOptions.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,18 +24,25 @@
 #ifndef ARM_COMPUTE_EXAMPLES_GEMM_TUNER_COMMON_GEMM_EXAMPLE_OPTIONS
 #define ARM_COMPUTE_EXAMPLES_GEMM_TUNER_COMMON_GEMM_EXAMPLE_OPTIONS
 
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
+#include "arm_compute/runtime/CL/CLTuner.h"
+
 #include "utils/command_line/CommandLineOptions.h"
 #include "utils/command_line/CommandLineParser.h"
+#include "utils/TypePrinter.h"
 
 namespace gemm_tuner
 {
 /** Structure holding all the common gemm example parameters */
 struct CommonGemmExampleParams
 {
-    size_t M{ 100 }; /**< Number of lhs matrix rows */
-    size_t N{ 100 }; /**< Number of rhs matrix columns */
-    size_t K{ 50 };  /**< Number of lhs matrix columns/rhs matrix rows */
-    size_t B{ 1 };   /**< Batch size */
+    size_t                   M{100};                                /**< Number of lhs matrix rows */
+    size_t                   N{100};                                /**< Number of rhs matrix columns */
+    size_t                   K{50};                                 /**< Number of lhs matrix columns/rhs matrix rows */
+    size_t                   B{1};                                  /**< Batch size */
+    arm_compute::DataType    data_type{arm_compute::DataType::F32}; /**< Data type */
+    arm_compute::CLTunerMode tuner_mode{arm_compute::CLTunerMode::RAPID}; /**< OpenCL tuner mode */
 };
 
 /** Formatted output of the CommonGemmExampleParams type
@@ -61,9 +68,11 @@ class CommonGemmExampleOptions
 public:
     /** Constructor
      *
-     * @param[in,out] parser A parser on which "parse()" hasn't been called yet.
+     * @param[in,out] parser            A parser on which "parse()" hasn't been called yet.
+     * @param[in]     default_data_type Default data type if unspecified.
      */
-    CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser);
+    CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser,
+                             arm_compute::DataType                  default_data_type = arm_compute::DataType::F32);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CommonGemmExampleOptions(const CommonGemmExampleOptions &) = delete;
     /** Prevent instances of this class from being copied (As this class contains pointers) */
@@ -80,6 +89,8 @@ public:
     arm_compute::utils::SimpleOption<size_t> *N;    /**< Number of rhs matrix columns option */
     arm_compute::utils::SimpleOption<size_t> *K;    /**< Number of lhs matrix columns/rhs matrix rows option */
     arm_compute::utils::SimpleOption<size_t> *B;    /**< Batch size option */
+    arm_compute::utils::EnumOption<arm_compute::DataType>    *data_type;  /**< Data type */
+    arm_compute::utils::EnumOption<arm_compute::CLTunerMode> *tuner_mode; /**< OpenCL tuner mode */
 };
 
 /** Consumes the common gemm example options and creates a structure containing all information
diff --git a/examples/gemm_tuner/GemmTuner.py b/examples/gemm_tuner/GemmTuner.py
index 29c414cfe8..ef1f31493e 100644
--- a/examples/gemm_tuner/GemmTuner.py
+++ b/examples/gemm_tuner/GemmTuner.py
@@ -1,4 +1,4 @@
-# Copyright (c) 2019 ARM Limited.
+# Copyright (c) 2019-2020 Arm Limited.
 #
 # SPDX-License-Identifier: MIT
 #
@@ -41,18 +41,21 @@ from typing import Deque, Dict, Generator, List, NamedTuple, Set, Tuple, Union
 Strategy = Enum("Strategy", ["Native", "ReshapedOnlyRHS", "Reshaped"])
 
 # Gemm parameter
+
+
 class GEMMParam(NamedTuple):
     M: int  # Number of lhs matrix rows
     N: int  # Number of rhs matrix columns
     K: int  # Number of lhs matrix columns/rhs matrix rows
     B: int  # Batch size
+    data_type: str  # Data type
 
-    @staticmethod
-    def parse_from_strs(*args):
-        return GEMMParam(*map(int, args))
+    @classmethod
+    def parse_from_strs(cls, *M_N_K_B, data_type):
+        return cls(*map(int, M_N_K_B), str(data_type))
 
     def __str__(self):
-        return "-".join(map(str, self))
+        return ",".join(map(str, self))
 
 
 # Gemm configuration for strategy Native
@@ -61,13 +64,13 @@ class NativeGEMMConfig(NamedTuple):
     n0: int  # Number of columns processed by the matrix multiplication
     k0: int  # Number of partial accumulations performed by the matrix multiplication
 
-    @staticmethod
-    def parse_from_strs(*args):
-        *mnk, = map(int, args)
-        return NativeGEMMConfig(*mnk)
+    @classmethod
+    def parse_from_strs(cls, *args):
+        (*mnk,) = map(int, args)
+        return cls(*mnk)
 
     def __str__(self):
-        return "-".join(map(str, self))
+        return ",".join(map(str, self))
 
 
 # Gemm configuration for strategy Reshaped Only RHS
@@ -75,19 +78,25 @@ class ReshapedOnlyRHSGEMMConfig(NamedTuple):
     m0: int  # Number of rows processed by the matrix multiplication
     n0: int  # Number of columns processed by the matrix multiplication
     k0: int  # Number of partial accumulations performed by the matrix multiplication
-    h0: int  # Number of horizontal blocks of size (k0xn0) stored on the same output row
-    interleave_rhs: bool  # Interleave rhs matrix (1) / Do not interleave rhs matrix (0)
-    transpose_rhs: bool  # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)
-
-    @staticmethod
-    def parse_from_strs(*args):
-        *mnkh, interleave_rhs, transpose_rhs = map(int, args)
+    # Number of horizontal blocks of size (k0xn0) stored on the same output row
+    h0: int
+    # Interleave rhs matrix (1) / Do not interleave rhs matrix (0)
+    interleave_rhs: bool
+    # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)
+    transpose_rhs: bool
+    # Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)
+    export_to_cl_image_rhs: bool
+
+    @classmethod
+    def parse_from_strs(cls, *args):
+        (*mnkh, interleave_rhs, transpose_rhs, export_to_cl_image_rhs,) = map(int, args)
         interleave_rhs = interleave_rhs == 1
         transpose_rhs = transpose_rhs == 1
-        return ReshapedOnlyRHSGEMMConfig(*mnkh, interleave_rhs, transpose_rhs)
+        export_to_cl_image_rhs = export_to_cl_image_rhs == 1
+        return cls(*mnkh, interleave_rhs, transpose_rhs, export_to_cl_image_rhs)
 
     def __str__(self):
-        return "-".join(map(str, self))
+        return ",".join(map(str, self))
 
 
 # Gemm configuration for strategy Reshaped
@@ -95,55 +104,90 @@ class ReshapedGEMMConfig(NamedTuple):
     m0: int  # Number of rows processed by the matrix multiplication
     n0: int  # Number of columns processed by the matrix multiplication
     k0: int  # Number of partial accumulations performed by the matrix multiplication
-    v0: int  # Number of vertical blocks of size (m0xk0) stored on the same output row
-    h0: int  # Number of horizontal blocks of size (k0xn0) stored on the same output row
-    interleave_lhs: bool  # Interleave lhs matrix (1) / Do not interleave lhs matrix (0)
-    interleave_rhs: bool  # Interleave rhs matrix (1) / Do not interleave rhs matrix (0)
-    transpose_rhs: bool  # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)
-
-    @staticmethod
-    def parse_from_strs(*args):
-        *mnkvh, interleave_lhs, interleave_rhs, transpose_rhs = map(int, args)
+    # Number of vertical blocks of size (m0xk0) stored on the same output row
+    v0: int
+    # Number of horizontal blocks of size (k0xn0) stored on the same output row
+    h0: int
+    # Interleave lhs matrix (1) / Do not interleave lhs matrix (0)
+    interleave_lhs: bool
+    # Interleave rhs matrix (1) / Do not interleave rhs matrix (0)
+    interleave_rhs: bool
+    # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)
+    transpose_rhs: bool
+    # Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)
+    export_to_cl_image_rhs: bool
+
+    @classmethod
+    def parse_from_strs(cls, *args):
+        (*mnkvh, interleave_lhs, interleave_rhs, transpose_rhs, export_to_cl_image_rhs,) = map(int, args)
         interleave_lhs = interleave_lhs == 1
         interleave_rhs = interleave_rhs == 1
         transpose_rhs = transpose_rhs == 1
-        return ReshapedGEMMConfig(*mnkvh, interleave_lhs, interleave_rhs, transpose_rhs)
+        export_to_cl_image_rhs = export_to_cl_image_rhs == 1
+        return cls(*mnkvh, interleave_lhs, interleave_rhs, transpose_rhs, export_to_cl_image_rhs)
 
     def __str__(self):
-        return "-".join(map(str, self))
+        return ",".join(map(str, self))
 
 
 # Measurement we take from the benchmark result.
 class Measurement(NamedTuple):
-    opencl_timer_ms: float
+    opencl_timer_ms_reshape: float
+    opencl_timer_ms_kernel: float
+
+    def get_total_ms(self):
+        return self.opencl_timer_ms_reshape + self.opencl_timer_ms_kernel
 
     def is_close_to(self, other, tol):
-        return math.fabs(self.opencl_timer_ms - other.opencl_timer_ms) < tol
+        return math.fabs(self.get_total_ms() - other.get_total_ms()) < tol
 
     def is_better_than(self, other, tol):
-        return self < other and not self.is_close_to(other)
+        return self.get_total_ms() < other.get_total_ms() and not self.is_close_to(
+            other
+        )
 
     def __add__(self, other):
-        return Measurement(self.opencl_timer_ms + other.opencl_timer_ms)
+        return Measurement(
+            self.opencl_timer_ms_reshape + other.opencl_timer_ms_reshape,
+            self.opencl_timer_ms_kernel + other.opencl_timer_ms_kernel,
+        )
 
     def __sub__(self, other):
-        return Measurement(self.opencl_timer_ms - other.opencl_timer_ms)
+        return Measurement(
+            self.opencl_timer_ms_reshape - other.opencl_timer_ms_reshape,
+            self.opencl_timer_ms_kernel - other.opencl_timer_ms_kernel,
+        )
 
     def __mul__(self, other):
-        return Measurement(self.opencl_timer_ms * other.opencl_timer_ms)
+        return Measurement(
+            self.opencl_timer_ms_reshape * other.opencl_timer_ms_reshape,
+            self.opencl_timer_ms_kernel * other.opencl_timer_ms_kernel,
+        )
 
     def __floordiv__(self, other):
-        return Measurement(self.opencl_timer_ms // other.opencl_timer_ms)
+        return Measurement(
+            self.opencl_timer_ms_reshape // other.opencl_timer_ms_reshape,
+            self.opencl_timer_ms_kernel // other.opencl_timer_ms_kernel,
+        )
 
     def __truediv__(self, other):
-        return Measurement(self.opencl_timer_ms / other.opencl_timer_ms)
+        return Measurement(
+            self.opencl_timer_ms_reshape / other.opencl_timer_ms_reshape,
+            self.opencl_timer_ms_kernel / other.opencl_timer_ms_kernel,
+        )
 
     def __pow__(self, power):
-        return Measurement(self.opencl_timer_ms ** power)
+        return Measurement(
+            self.opencl_timer_ms_reshape ** power, self.opencl_timer_ms_kernel ** power
+        )
+
+    def __str__(self):
+        return ",".join(map(str, self))
 
 
 # GEMMConfig Type
-GEMMConfigT = Union[NativeGEMMConfig, ReshapedOnlyRHSGEMMConfig, ReshapedGEMMConfig]
+GEMMConfigT = Union[NativeGEMMConfig,
+                    ReshapedOnlyRHSGEMMConfig, ReshapedGEMMConfig]
 
 
 # Representation of the benchmark result from a single experiment
@@ -154,24 +198,6 @@ class BenchmarkResult(NamedTuple):
     measurement: Measurement
 
 
-# Representation of a single row of BenchmarkResult in CSV
-# NOTE: In the CSV representation, we merge all fields of Gemm Config into a single field "GEMMConfig", but keep the
-# fields of GEMMParam and Measurement
-# The example entry including header would look like:
-# M   , N , K  , B, Strategy         , GEMMConfig       , OpenCLTimer_MS
-# 1225, 32, 192, 1, Reshaped         , 4-4-4-3-1-1-1-0  , 0.3309
-BenchmarkResultCSVRow = namedtuple(
-    "BenchmarkResultCSVRow", GEMMParam._fields + ("Strategy", "GEMMConfig") + Measurement._fields
-)
-
-
-def benchmark_result_2_csv_row(result: BenchmarkResult) -> BenchmarkResultCSVRow:
-    """ Convert a BenchmarkResult into its CSV row form """
-    return BenchmarkResultCSVRow(
-        *(result.gemm_param + (result.strategy.name, str(result.gemm_config)) + result.measurement)
-    )
-
-
 class GEMMBenchmarkResultRecorder:
     """ A recorder that records and organises GEMM Benchmark results, and produces various reports on the record.
     """
@@ -210,7 +236,9 @@ class GEMMBenchmarkResultRecorder:
             best_gc_set = best_gc_sets.setdefault((gemm_param, strategy), [])
             best_gc_set.append((gemm_config, measurement))
             # Sort the best config set (list)
-            best_gc_set = sorted(best_gc_set, key=lambda gc_and_m: gc_and_m[1])
+            best_gc_set = sorted(
+                best_gc_set, key=lambda gc_and_m: gc_and_m[1].get_total_ms()
+            )
             # Filter out configs that are beyond tolerance to the best GEMMConfig's measurement
             best_gc, best_m = best_gc_set[0]
             best_gc_set_new = [
@@ -228,9 +256,14 @@ class GEMMBenchmarkResultRecorder:
         """ Get the best GEMMConfig set per GEMMParam per Strategy, and flatten the result into a sequence
         of BenchmarkResults
         """
-        for (gemm_param, strategy), best_gc_sets in self.get_best_gemm_configs().items():
+        for (
+            (gemm_param, strategy),
+            best_gc_sets,
+        ) in self.get_best_gemm_configs().items():
             for best_gemm_config, best_measurement in best_gc_sets:
-                yield BenchmarkResult(gemm_param, strategy, best_gemm_config, best_measurement)
+                yield BenchmarkResult(
+                    gemm_param, strategy, best_gemm_config, best_measurement
+                )
 
     def get_config_distributions(self):
         """ Return GEMMConfigDistribution for each strategy
@@ -244,38 +277,72 @@ class GEMMBenchmarkResultRecorder:
 
         return gemm_config_distributions
 
-    def save_to_csvs(self, out_dir, only_best_config=True):
-        """ Save records to an output directory of csv files.
-        The directory is organized such that each strategy gets its own CSV file.
+    def get_best_gemm_strategies(self):
+        """ Get the best Stratey per GEMMParam
+        """
+        all_results: Dict[GEMMParam, List[Tuple[Strategy, Measurement]]] = defaultdict(
+            list
+        )
+
+        best_strategies: Dict[GEMMParam, Strategy] = {}
+
+        for gemm_param, strategy, gemm_config, measurement in self.get_record():
+            all_results[gemm_param].append((strategy, measurement))
+
+        for gemm_param, results_set in all_results.items():
+            # Sort the best results set (list)
+            results_set = sorted(
+                results_set, key=lambda s_and_m: s_and_m[1].get_total_ms()
+            )
+            # Select best Strategy
+            best_s, best_m = results_set[0]
+            best_strategies[gemm_param] = best_s
+
+        return best_strategies
+
+    def save_to_jsons(self, out_dir, only_best_config=True):
+        """ Save records to an output directory of JSON files.
+        The directory is organized such that each strategy gets its own JSON file.
+        The directory also includes a JSON file to define the best strategy per GEMM Param.
         """
         if not os.path.exists(out_dir):
-            logging.info("Output directory {} does not exist. Creating...".format(out_dir))
+            logging.info(
+                "Output directory {} does not exist. Creating...".format(
+                    out_dir)
+            )
             os.mkdir(out_dir)
+
+        out_json_path = os.path.join(out_dir, "gemm_type_selection.json")
+        if check_out_path(out_json_path):
+            results = self.get_best_gemm_strategies()
+            results = {str(key): value.name for key, value in results.items()}
+            dump_json(out_json_path, results)
+
         for strategy in self._strategies:
-            out_csv_path = os.path.join(out_dir, strategy.name)
-            if os.path.exists(out_csv_path):
-                overwrite = (
-                    input(
-                        "Output CSV {} already exists. Overwrite? [Y/N]: ".format(out_csv_path)
-                    ).lower()
-                    == "y"
-                )
-                if not overwrite:
-                    logging.info("Skipping {}".format(out_csv_path))
-                    continue
-            logging.info("Saving csv file to {}".format(out_csv_path))
-            record = (
-                self.get_best_gemm_configs_as_sequence() if only_best_config else self.get_record()
+            out_json_path = os.path.join(
+                out_dir, ("gemm_config_" + strategy.name.lower() + ".json")
             )
-            with open(out_csv_path, "w") as f:
-                csv_writer = csv.DictWriter(f, fieldnames=BenchmarkResultCSVRow._fields)
-                csv_writer.writeheader()
-                csv_writer.writerows(
-                    benchmark_result_2_csv_row(res)._asdict()
-                    for res in record
-                    if res.strategy == strategy
+            if check_out_path(out_json_path):
+                record = (
+                    self.get_best_gemm_configs_as_sequence()
+                    if only_best_config
+                    else self.get_record()
                 )
-            logging.info("Saved")
+                results = defaultdict(list)
+                for res in record:
+                    if res.strategy == strategy:
+                        results[str(res.gemm_param)].append(
+                            {
+                                "GEMMConfig": str(res.gemm_config),
+                                "OpenCL_Timer_ms_reshape": str(
+                                    res.measurement.opencl_timer_ms_reshape
+                                ),
+                                "OpenCL_Timer_ms_kernel": str(
+                                    res.measurement.opencl_timer_ms_kernel
+                                ),
+                            }
+                        )
+                dump_json(out_json_path, results)
 
     def summary(self, sum_level=SummaryLevel.Short):
         """ Return the summary string of the record
@@ -314,9 +381,9 @@ class GEMMConfigDistribution:
     def __init__(self):
         """ Initializer
         """
-        self._gemm_config_dist: Dict[GEMMConfig, List[Tuple[GEMMParam, Measurement]]] = defaultdict(
-            list
-        )
+        self._gemm_config_dist: Dict[
+            GEMMConfig, List[Tuple[GEMMParam, Measurement]]
+        ] = defaultdict(list)
         self._gemm_config_freq = Counter()
 
     def add(self, benchmark_result: BenchmarkResult):
@@ -376,14 +443,15 @@ EXAMPLE_FILE_2_STRATEGY = {
 #           GEMMParam + GEMMConfig
 #   in that order.
 # For example, the example args of running a reshaped rhs only example could be:
-#   100,100,100,1, 4, 4, 4, 1,             1,            1
-#   M  ,N  ,K,  B,m0,n0,k0,h0,interleave_rhs,transpose_rhs
-#   <-GEMMParam-><-------------GEMMConfig-------------->
+#   100,100,100,1, 4, 4, 4, 1,             1,            1,                     0
+#   M  ,N  ,K,  B,m0,n0,k0,h0,interleave_rhs,transpose_rhs,export_to_cl_image_rhs
+#   <-GEMMParam-><-------------GEMMConfig--------------------------------------->
 # Note that the test strategy_name == strategy.name is in place to avoid unwanted enum aliases
 GEMM_EXAMPLE_ARGS_FACTORY = {
+    # We ignore the data type field from GEMMParam as that is extracted separately
     strategy: namedtuple(
         "{}_Gemm_Example_Args".format(strategy_name),
-        GEMMParam._fields + GEMM_CONFIG_FACTORY[strategy]._fields,
+        GEMMParam._fields[:-1] + GEMM_CONFIG_FACTORY[strategy]._fields,
     )
     for strategy_name, strategy in Strategy.__members__.items()
     if strategy_name == strategy.name
@@ -398,8 +466,11 @@ BENCHMARK_RESULT_JSON_EXTENSION = "gemmtuner_benchmark"
 
 
 def parse_benchmark_commandline(commandline: str) -> Dict[str, str]:
-    """ Parse the benchmark example command-line string into a dictionary of command-line agruments
+    """ Parse the benchmark example command-line string into a dictionary of command-line arguments
     """
+    # Separate the data type option from the example_args portion of the string
+    commandline = commandline.replace(",--type=", " --type=")
+
     args = commandline.split()
     # Discard program name
     args = args[1:]
@@ -439,30 +510,47 @@ def extract_benchmark_results(
         # Get gemm params + gemm configs from example args
         benchmark_args = parse_benchmark_commandline(json_res["CommandLine"])
         Gemm_Example_Args_T = GEMM_EXAMPLE_ARGS_FACTORY[strategy]
-        example_args = Gemm_Example_Args_T(*(benchmark_args["example_args"].split(",")))
+        example_args = Gemm_Example_Args_T(
+            *(benchmark_args["example_args"].split(",")))
         # Gemm_Example_Arg consists of GEMMParam first and then GEMMConfig (in that order)
-        gemm_param_fields_len = len(GEMMParam._fields)
-        gemm_param = GEMMParam.parse_from_strs(*example_args[:gemm_param_fields_len])
+        # However data type option is parsed separately from end of options, hence -1 is applied to fields length
+        gemm_param_fields_len = len(GEMMParam._fields) - 1
+        gemm_param = GEMMParam.parse_from_strs(
+            *example_args[:gemm_param_fields_len],
+            data_type = benchmark_args["type"])
         GEMMConfig = GEMM_CONFIG_FACTORY[strategy]
-        gemm_config = GEMMConfig.parse_from_strs(*example_args[gemm_param_fields_len:])
+        gemm_config = GEMMConfig.parse_from_strs(
+            *example_args[gemm_param_fields_len:])
 
         # Get OpenCL_Time_Ms stats
         measurements = list(example_test_data["measurements"].items())
-        # There should only be 1 instrument per run
-        assert len(measurements) == 1
-        measurement_instrument, data = measurements.pop()
-        # Get instrument name and assert that it is the one we expect
-        measurement_instrument_name = measurement_instrument.split("/")[0]
-        assert measurement_instrument_name == "OpenCLTimer"
-        # Take either the minimum or the average of the raw data as the measurement value
-        if measurement_method == "min":
-            measurement_val = min(data["raw"])
-        elif measurement_method == "avg":
-            measurement_val = sum(data["raw"]) / len(data["raw"])
-        else:
-            raise ValueError("Invalid measurement method: {}".format(measurement_method))
-
-        measurement = Measurement(measurement_val)
+        # For reshaped RHS only we have two measurements (one also for the reshape kernel)
+        # Hence we must parse and sum them
+        measurement_ms_reshape = 0
+        measurement_ms_kernel = 0
+        for single_measurement in measurements:
+            measurement_instrument, data = single_measurement
+            # Get instrument name and assert that it is the one we expect
+            measurement_instrument_name = measurement_instrument.split("/")[0]
+            assert measurement_instrument_name == "OpenCLTimer"
+            # Take either the minimum or the average of the raw data as the measurement value
+            if measurement_method == "min":
+                measurement_val = min(data["raw"])
+            elif measurement_method == "avg":
+                measurement_val = sum(data["raw"]) / len(data["raw"])
+            else:
+                raise ValueError(
+                    "Invalid measurement method: {}".format(measurement_method)
+                )
+
+            measurement_type = measurement_instrument.split("/")[1]
+            if "reshape" in measurement_type.split("_"):
+                measurement_ms_reshape = measurement_val
+            else:
+                measurement_ms_kernel = measurement_val
+
+        measurement = Measurement(
+            measurement_ms_reshape, measurement_ms_kernel)
 
         yield BenchmarkResult(gemm_param, strategy, gemm_config, measurement)
 
@@ -475,15 +563,42 @@ def parse_json(dir_name):
             yield json.load(res_fp)
 
 
+def check_out_path(out_path):
+    if os.path.exists(out_path):
+        overwrite = (
+            input(
+                "Output JSON {} already exists. Overwrite? [Y/N]: ".format(
+                    out_path)
+            ).lower()
+            == "y"
+        )
+        if not overwrite:
+            logging.info("Skipping {}".format(out_path))
+            return False
+    logging.info("Saving JSON file to {}".format(out_path))
+    return True
+
+
+def dump_json(out_path, dict):
+    with open(out_path, "w") as f:
+        json.dump(dict, f)
+    logging.info("Saved")
+
+
 ################################################################################
 # Main
 ################################################################################
 
 
 def main(args):
-    logging.info("Searching best gemm configurations from {}".format(args.benchmark_results_dir))
+    logging.info(
+        "Searching best gemm configurations from {}".format(
+            args.benchmark_results_dir)
+    )
 
-    benchmark_results = extract_benchmark_results(parse_json(args.benchmark_results_dir))
+    benchmark_results = extract_benchmark_results(
+        parse_json(args.benchmark_results_dir)
+    )
 
     # Add all benchmark results to the recorder
     benchmark_result_recorder = GEMMBenchmarkResultRecorder(tol=args.tolerance)
@@ -496,7 +611,8 @@ def main(args):
         recorder_sum_level = GEMMBenchmarkResultRecorder.SummaryLevel.Short
 
     # Print overall summary of the recorded results
-    logging.info(benchmark_result_recorder.summary(sum_level=recorder_sum_level))
+    logging.info(benchmark_result_recorder.summary(
+        sum_level=recorder_sum_level))
 
     # Get GEMM configuration distributions for each strategy
     all_config_dists = benchmark_result_recorder.get_config_distributions()
@@ -508,12 +624,16 @@ def main(args):
         for config, freq in config_dist.frequency():
             logging.debug("{}, {}".format(config, freq))
         logging.info(
-            "Best GEMM Config: {} with std: {}".format(config_dist.best_config(), config_dist.std())
+            "Best GEMM Config: {} with std: {}".format(
+                config_dist.best_config(), config_dist.std()
+            )
         )
 
-    # Save the recorded results to csv files in output directory
+    # Save the recorded results to JSON files in output directory
     if args.output_dir is not None:
-        benchmark_result_recorder.save_to_csvs(args.output_dir, only_best_config=(not args.debug))
+        benchmark_result_recorder.save_to_jsons(
+            args.output_dir, only_best_config=(not args.debug)
+        )
 
 
 if __name__ == "__main__":
@@ -538,7 +658,7 @@ if __name__ == "__main__":
         metavar="PATH",
         action="store",
         type=str,
-        help="Path to directory that holds output csv files. One per strategy",
+        help="Path to directory that holds output JSON files. One for strategy selection and one per strategy for GEMM config selection",
     )
     parser.add_argument(
         "-t",
@@ -550,7 +670,11 @@ if __name__ == "__main__":
         milliseconds. Recommended value: <= 0.1 ms",
     )
     parser.add_argument(
-        "-D", "--debug", dest="debug", action="store_true", help="Enable script debugging output"
+        "-D",
+        "--debug",
+        dest="debug",
+        action="store_true",
+        help="Enable script debugging output",
     )
     args = parser.parse_args()
     logging_level = logging.DEBUG if args.debug else logging.INFO
diff --git a/examples/gemm_tuner/GemmTunerHelpers.h b/examples/gemm_tuner/GemmTunerHelpers.h
new file mode 100644
index 0000000000..dbff9e2dff
--- /dev/null
+++ b/examples/gemm_tuner/GemmTunerHelpers.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2020-2021 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef EXAMPLES_GEMMTUNERHELPERS_H
+#define EXAMPLES_GEMMTUNERHELPERS_H
+
+#include "arm_compute/core/CL/CLHelpers.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+
+namespace examples
+{
+namespace gemm_tuner_helpers
+{
+bool update_padding_for_cl_image(arm_compute::ITensorInfo *tensor)
+{
+    constexpr unsigned int num_floats_per_pixel = 4;
+
+    const unsigned int stride_y_in_elements = tensor->strides_in_bytes()[1] / tensor->element_size();
+    const unsigned int pixel_aligment =
+        arm_compute::get_cl_image_pitch_alignment(arm_compute::CLKernelLibrary::get().get_device());
+    if (pixel_aligment == 0)
+    {
+        return false;
+    }
+    const unsigned int row_pitch_alignment = pixel_aligment * num_floats_per_pixel;
+    const unsigned int round_up_width =
+        ((stride_y_in_elements + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment;
+    const unsigned int padding = round_up_width - stride_y_in_elements;
+
+    tensor->extend_padding(arm_compute::PaddingSize(0, padding, 0, 0));
+    return true;
+}
+} // namespace gemm_tuner_helpers
+} // namespace examples
+
+#endif /* EXAMPLES_GEMMTUNERHELPERS_H */
diff --git a/examples/gemm_tuner/README.md b/examples/gemm_tuner/README.md
index a4cde10403..aae803eabb 100644
--- a/examples/gemm_tuner/README.md
+++ b/examples/gemm_tuner/README.md
@@ -2,36 +2,101 @@
 
 ## Introduction
 
-This is a set of 2 script tools for tuning the performance of OpenCL GEMM kernels (limited to Convolution layer
-functions only for now).  Specifically, we tune 3 GEMM kernels, each has a different implementation **strategy** of the
-GEMM operation: **native**, **reshaped**, **reshaped only rhs**. The details of these strategies can be found in the
-documentations of the corresponding kernels: **CLGEMMMatrixMultiplyNativeKernel**,
-**CLGEMMMatrixMultiplyReshapedKernel** and **CLGEMMMatrixMultiplyReshapedOnlyRHSKernel**.
-
-The outputs of the tuning process are 1 optimal configuration (called **GEMM Configuration** or **GEMMConfig**, for
-more details see Approach section) for each of the 3 strategies.
+This is a set of tools for tuning the performance of OpenCL GEMM kernels.  Specifically, we tune 3 GEMM kernels, each
+has a different implementation **strategy** of the GEMM operation: **native**, **reshaped**, **reshaped only rhs**.
+The details of these strategies can be found in the documentations of the corresponding kernels:
+**CLGEMMMatrixMultiplyNativeKernel**, **CLGEMMMatrixMultiplyReshapedKernel** and
+**CLGEMMMatrixMultiplyReshapedOnlyRHSKernel**.
+
+The Tuner consists of 2 scripts and 3 binaries:
+* cl_gemm_benchmark and GemmTuner.py under examples/gemm_tuner, and
+* benchmark_cl_gemm_native, benchmark_cl_gemm_reshaped_rhs_only and benchmark_cl_gemm_reshaped under
+  build/tests/gemm_tuner (you'll need to build the library first)
+
+The inputs to the Tuner are a list of 4 valued tuples we call **GEMM shape** or **GEMMParam** (M, N, K, B, and possibly
+data type). They define the "shape" and other parameters (eg. data type) of a GEMM operation:
+```
+LHS x RHS = DST
+```
+Where LHS is of shape MxK, RHS is of shape KxN and DST is of shape MxN, and B is the batch size.
+
+The outputs of the tuning process are 4 json files:
+1. gemm_type_selection.json: selects which kernel type is the best for each GEMMParam
+2. gemm_config_native.json: selects a list of best **GEMMConfigs** of the native kernel for each GEMMParam
+3. gemm_config_reshapedonlyrhs.json: selects a list of best GEMMConfigs of the reshaped_only_rhs kernel for each GEMMParam
+4. gemm_config_reshaped.json: selects a list of best GEMMConfigs of the reshaped kernel for each GEMMParam
+
+These 4 files are the current representations we use for what we call the **heuristics** of a GEMM op: given a GEMMParam,
+what kernel and subsequently what configurations for that kernels are the most performant.
+
+## Step-by-step example
+
+### Step1: Prepare the shape and configs files
+1. We first need to identify the shapes that we are interested in and store them in a csv file, say *gemm_shapes.csv*.
+2. Then we need to specify a set of good GEMMConfig candidates for each kernel in 3 separate csv files (this requires
+    some prior heuristics, but can be provided by the Compute Library developers upon requests, based on your target device).
+
+   Say we have *gemm_configs_native.csv", "gemm_configs_reshaped.csv" and "gemm_configs_reshaped_only_rhs.csv".
+
+   Please refer to the Prerequisite section for more details
+
+### Step2: Push relevant files to the target device
+All the files that need to be present on the target device are:
+* benchmark script: \<ComputeLibrary\>/examples/gemm_tuner/cl_gemm_benchmark
+* shapes and configs csv files: gemm_shapes.csv, gemm_configs_native.csv, gemm_configs_reshaped_only_rhs.csv, gemm_configs_reshaped.csv
+* Example benchmark binaries: \<ComputeLibrary\>/build/tests/gemm_tuner/benchmark_cl_gemm*
+
+### Step3: Collect benchmark data
+With these files on device, we can collect benchmark data using the script. Assume all the example binaries are pushed
+to a folder called *gemm_tuner*. While logged onto our device:
+```
+# Native
+./cl_gemm_benchmark -s native -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_native.csv -o results/native
+# Reshaped Only RHS
+./cl_gemm_benchmark -s reshaped_rhs_only -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped_only_rhs.csv -o results/reshaped_only_rhs
+# Reshaped
+./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped
+```
+You can repeat the 3 commands above to have a bit redundancy in your benchmark data (as you can imagine, measurement is noisy),
+but you may need to change the output folder for each repeat
+
+It is also possible to split the benchmark phase among different platforms using the **-i** and **-n** options to specificy the starting experiment and the number of benchmark to run.
+
+# Reshaped benchmark on 3 different platforms
+## Platform 1
+./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped -i 0 -n 8
+## Platform 2
+./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped -i 8 -n 8
+## Platform 3
+./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped -i 16 -n 8
+
+### Step4: Generate the heuristics
+1. After benchmarking, we pull the benchmark data, the *results* folder, from the target device to our host machine
+2. We use the GemmTuner.py script to give us the heuristics
+   ```
+   python3 <ComputeLibrary>/examples/gemm_tuner/GemmTuner.py -b ./results -o heuristics
+   ```
+   When it's finished, there should be 4 json files in the *heuristics* folder
 
-## Location
-The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found under $ACL_ROOT/examples/gemm_tuner.
+One thing to notice is that the config heuristics might give more than 1 recommendations for each GEMMParam, because
+we accept all good GEMMConfigs with a tolerance. If you want fewer recommendations, you can decrease the tolerance by
+passing a lower value to *-t \<tolerance\>* to the GemmTuner.py script.
 
-## Pre-requisite
+## Prerequisite
 * A target device to be tuned, plus the following on the device:
     * Android or Linux OS
     * Bash shell
-    * Built ACL with benchmark examples binaries
-    * benchmark_gemm_examples.sh script
+    * Built Compute Library with benchmark examples binaries
+    * cl_gemm_benchmark script
     * gemm shape file
 
        A csv file containing the **GEMMParam search list**. This is the list of GEMMParams/gemm shapes that we're
-       interested in (For more details see Approach section). The default list is prepared by ACL developers in advance
+       interested in (For more details see Approach section). The default list is prepared by Compute Library developers in advance
        and can be provided on request.
 
        The format is described as:
 
-       A headerless csv file with fields separated by commas and commas only (there cannot be whitespaces around each
-       field).
-
-       Note also comments and extraneous empty lines are not permitted.
+       A headerless csv file with fields separated by commas.
 
        A gemm shape is a list of 4 positive integers \<M, N, K, B\> describing the shapes of the two matrices (LHS and
        RHS) with:
@@ -50,14 +115,14 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u
     * gemm config file  
       A csv file containing the **GEMMConfig search list**. This is the list of candidate GEMMConfigs among which we
       search for the optimal one. **Note that we have a different list for each strategy.**
-      The default lists are prepared by ACL developers in advance and can be provided on request.
+      The default lists are prepared by Compute Library developers in advance and can be provided on request.
 
       The format of the file for each strategy is the same:  
 
-      A headerless csv file with fields separated by commas and commas only (there cannot be whitespaces around each
-      field). Note also comments and extraneous empty lines are not permitted.
+      A headerless csv file with fields separated by commas.
 
       However the fields of GEMMConfig differ for each strategy:
+
       * Strategy **native**:
         A gemm config is a list of 3 positive integers \<m0, n0, k0\>, with:
 
@@ -78,9 +143,7 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u
   ...
   ```
       * Strategy **reshaped_rhs_only**:
-
-        A gemm config is a list of 4 positive integers \<m0, n0, k0, h0\> and 2 boolean values interleave_rhs and
-        transpose_rhs, with:
+        A gemm config is a list of 4 positive integers <m0, n0, k0, h0> and 3 boolean values:
 
         m0 - Number of rows processed by the matrix multiplication  
         n0 - Number of columns processed by the matrix multiplication  
@@ -88,6 +151,9 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u
         h0 - Number of horizontal blocks of size (k0xn0) stored on the same output row  
         interleave_rhs - Interleave rhs matrix (1) / Do not interleave rhs matrix (0)  
         transpose_rhs - Transpose rhs matrix (1) / Do not transpose rhs matrix (0)  
+        export_to_cl_image_rhs - Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0). Can only be true
+                                with certain combinations of the GEMMParams and other configs. Please refer to CLGEMMReshapeRHSMatrixKernel
+                                for more details
 
         Only the following configurations of M0, N0 and K0 are currently supported:
 
@@ -98,14 +164,12 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u
 
         An example gemm config file looks like:
   ```
-  4,4,4,1,1,1
-  4,4,4,3,1,0
+  4,4,4,1,1,1,0
+  4,4,4,3,1,0,1
   ...
   ```
       * Strategy **reshaped**:
-
-        A gemm config is a list of 5 positive integers \<m0, n0, k0, v0, h0\> and 3 boolean values interleave_lhs,
-        interleave_rhs and transpose_rhs, with:
+        A gemm config is a list of 5 positive integers <m0, n0, k0, v0, h0> and 4 boolean values:
 
         m0 - Number of rows processed by the matrix multiplication  
         n0 - Number of columns processed by the matrix multiplication  
@@ -114,29 +178,31 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u
         h0 - Number of horizontal blocks of size (k0xn0) stored on the same output row  
         interleave_lhs - Interleave lhs matrix (1) / Do not interleave lhs matrix (0)  
         interleave_rhs - Interleave rhs matrix (1) / Do not interleave rhs matrix (0)  
-        transpose_rhs - Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose
-        lhs matrix (0)  
+        transpose_rhs - Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)  
+        export_to_cl_image_rhs - Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0). Can only be true
+                                with certain combinations of the GEMMParams and other configs. Please refer to CLGEMMReshapeRHSMatrixKernel
+                                for more details
 
-        * If rhs matrix is transposed only the following configurations are currently supported:
+        If rhs matrix is transposed only the following configurations are currently supported:
 
-          M0 = 2, 3, 4, 5, 6, 7, 8  
-          N0 = 2, 3, 4, 8, 16  
-          K0 = 2, 3, 4, 8, 16  
-          V0 >= 1  
-          H0 >= 1  
+        M0 = 2, 3, 4, 5, 6, 7, 8  
+        N0 = 2, 3, 4, 8, 16  
+        K0 = 2, 3, 4, 8, 16  
+        V0 >= 1  
+        H0 >= 1  
 
-        * If lhs matrix is transposed only the following configurations are currently supported:
+        If lhs matrix is transposed only the following configurations are currently supported:
 
-          M0 = 2, 3, 4, 8  
-          N0 = 2, 3, 4, 8, 16  
-          K0 = 2, 3, 4, 8, 16  
-          V0 >= 1  
-          H0 >= 1  
+        M0 = 2, 3, 4, 8  
+        N0 = 2, 3, 4, 8, 16  
+        K0 = 2, 3, 4, 8, 16  
+        V0 >= 1  
+        H0 >= 1  
 
         An example gemm config file looks like:
   ```
-  4,4,4,1,3,1,1,1
-  4,4,4,3,3,1,1,0
+  4,4,4,1,3,1,1,1,0
+  4,4,4,3,3,1,1,0,1
   ...
   ```
 * A host machine, plus these on the machine:
@@ -144,45 +210,53 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u
     * GemmTuner.py script
 
 ## Usage
-The tuning stage consists of 2 steps:
+The usage of the 2 scripts:
 
-1. Run benchmarks:
+1. cl_gemm_benchmark
 
-   Run the shell script (**benchmark_gemm_examples.sh**) on your **target device**. Note that all the built benchmark
-   examples have to be present on your target device prior to running. The benchmark results will be saved to json
-   files in an output directory.
+   Run the shell script (**cl_gemm_benchmark**) on your **target device**. Note that all the built benchmark
+   examples: build/tests/gemm_tuner/benchmark_cl_gemm*, have to be present on your target device prior to running.
+   The benchmark results will be saved to json files in an output directory.
    ```
-   Usage: benchmark_gemm_examples.sh [-h] -s \<strategy\> -e \<example_binary_dir\> -g \<gemm_shape_file\>
-   -c \<gemm_config_file\> [-o \<out_dir\>]
+   Usage: cl_gemm_benchmark [-h] -s \<strategy\> -e \<example_binary_dir\> -g \<gemm_shape_file\>
+   -c \<gemm_config_file\> [-d \<data_type\>] [-o \<out_dir\>]
 
    Options:
            -h
-           Print help messages. If a strategy is specified with -s \<strategy\>, then only display messages relevant
-           to that strategy. Otherwise if no strategy is specified, display messages for all available strategies.
+           Print help messages. If a strategy is specified with -s <strategy>, then only display messages relevant to that
+           strategy. Otherwise if no strategy is specified, display messages for all available strategies.
 
-           -s \<strategy\>
+           -s <strategy>
            Strategy option.
-           Options: native reshaped_rhs_only reshaped.
+           Options: ${ALL_STRATEGY_OPTIONS[@]}.
 
-           -e \<example_binary_dir\>
+           -e <example_binary_dir>
            Path to directory that holds all example binaries
 
-           -g \<gemm_shape_file\>
+           -g <gemm_shape_file>
            Path to gemm shape csv file
 
-           -c \<gemm_config_file\>
+           -c <gemm_config_file>
            Path to gemm config csv file
 
-           -o \<out_dir\>
+           -d <data_type>
+           Data type option with which to run benchmark examples
+           Default: ${DEFAULT_DATA_TYPE}
+           Supported options:
+           Strategy            :    Data Types
+           Native              :    F32
+           Reshaped            :    F16, F32
+           Reshaped RHS Only   :    F16, F32
+
+           -o <out_dir>
            Path to output directory that holds output json files
-           Default: out
+           Default: ${DEFAULT_OUT_DIR}
    ```
-2. Run analyser:
+2. GemmTuner.py:
 
   Run the python script (**GemmTuner.py**) on your **host machine**.
   You'll need to transfer all the benchmark result json files generated from the previous step to your host machine
-  beforehand. The script will output the best configuration, along with some analysis statistics for each strategy, and
-  optionally save the parsed benchmark results into csv files (one for each strategy) for further analysis.
+  beforehand. The script will output the best kernel and gemm configurations for each gemm param in the 4 output json files
    ```
    Usage: GemmTuner.py [-h] -b PATH [-o PATH] [-t TOLERANCE] [-D]
 
@@ -194,8 +268,7 @@ The tuning stage consists of 2 steps:
                            result json files have a file extension of
                            'gemmtuner_benchmark'
      -o PATH, --output_dir PATH
-                           Path to directory that holds output csv files. One per
-                           strategy
+                           Path to directory that holds output json files.
      -t TOLERANCE, --tolerance TOLERANCE
                            For testing if two GEMMConfigs are equivalent in terms
                            of performance. The tolerance is OpenCL timer in
@@ -203,31 +276,3 @@ The tuning stage consists of 2 steps:
      -D, --debug           Enable script debugging output
 
    ```
-
-## Approach
-
-This section gives a brief description and rationale of the approach adopted by the current version of GEMM Tuner.
-
-As explained in the Introduction section, the outputs of the tuner are 1 optimal GEMMConfig for each strategy.
-This is because we can only integrate 1 GEMMConfig for each strategy in ACL at compile time. In theory, however, the
-optimal GEMMConfig also depends on different parameters of GEMM (called GEMM Parameter or GEMMParam, e.g.: the shape
-of the operation); thus ideally, for each strategy, the optimal configurations should be a mapping from GEMMParam to
-GEMMConfig instead of a single GEMMConfig.
-
-To address this issue, we ensure the one single optimal GEMMConfig can generalise well to all potential GEMMParams
-(or at least the ones that we care about). The approach we adopt involves a preliminary stage where a collection of
-common GEMMParams (GEMM shapes from popular networks) are compiled. Then, to reduce the final tuning time, rather
-contradictorily, we spend a lot of time searching for near-optimal GEMMConfigs for each GEMMParam first, and then
-discard redundant GEMMParams which share similar optimal GEMMConfigs with others. The resultant list of GEMMParams is
-called a __GEMMParam search list__, as in these GEMMParams are typical enough to capture the space of GEMMParams that
-we care about.
-
-During this preliminary stage we also produce a list of good GEMMConfigs that can be used to search for the optimal one
-in the actual tuning stage. This, again, is to reduce the tuning time, and the resultant list is called a
-__GEMMConfig search list__.
-
-The GEMMParam search list and the GEMMConfig search list are investigated and prepared by the developers; the users of
-GEMM tuner need not worry about producing them, but they need to obtain them prior to running the tuner.
-
-Once these two lists (2 for each strategy, so 6 in total) are obtained, they can be fed to the tuner, to produce the
-optimal GEMMConfig(s).
-\ No newline at end of file
diff --git a/examples/gemm_tuner/benchmark_gemm_examples.sh b/examples/gemm_tuner/cl_gemm_benchmark.sh
index d6f41cc22a..92fe6b194e 100755
--- a/examples/gemm_tuner/benchmark_gemm_examples.sh
+++ b/examples/gemm_tuner/cl_gemm_benchmark.sh
@@ -1,4 +1,4 @@
-# Copyright (c) 2019 ARM Limited.
+# Copyright (c) 2019-2021 Arm Limited.
 #
 # SPDX-License-Identifier: MIT
 #
@@ -31,14 +31,34 @@ CMD=$( basename $0 )
 # All supported strategy options
 ALL_STRATEGY_OPTIONS=("native" "reshaped_rhs_only" "reshaped")
 
+# All supported data type options
+ALL_DATA_TYPE_OPTIONS=("f32" "f16" "qasymm8")
+
 # Names of example binary for each strategy
 EXAMPLE_BIN_NATIVE="benchmark_cl_gemm_native"
 EXAMPLE_BIN_RESHAPED_RHS_ONLY="benchmark_cl_gemm_reshaped_rhs_only"
 EXAMPLE_BIN_RESHAPED="benchmark_cl_gemm_reshaped"
+EXAMPLE_BIN_RESHAPED_RHS_ONLY_LOWP="benchmark_cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint"
+EXAMPLE_BIN_RESHAPED_LOWP="benchmark_cl_gemmlowp_reshaped"
+
+# Default data type
+DEFAULT_DATA_TYPE="f32"
 
 # Default output directory
 DEFAULT_OUT_DIR="out"
 
+# Default ID of the first experiment
+DEFAULT_ID_EXPERIMENT_START=0
+
+# Default total number of experiments
+DEFAULT_NUM_EXPERIMENTS="all"
+
+# Default output file extension
+DEFAULT_OUT_EXTENSION="mlgo_benchmark"
+
+# Default OpenCL tuner mode
+DEFAULT_TUNER_MODE="rapid"
+
 # Number of iterations for each benchmark run
 NUM_ITERATION=5
 # Global }}}
@@ -56,10 +76,7 @@ NUM_ITERATION=5
 function help_gemm_shape_file() {
   cat >&2 << EOF
 Gemm shape file:
-  Gemm shape file is a headerless csv file with fields separated by commas and commas only (there cannot be whitespaces
-  around each field).
-
-  Note also comments and extraneous empty lines are not permitted.
+  Gemm shape file is a csv file with fields separated by commas. The optional header and comments are ignored by the parser.
 
   A gemm shape is a list of 4 positive integers <M, N, K, B> describing the shapes of the two matrices (LHS and RHS)
   with:
@@ -88,10 +105,7 @@ EOF
 function help_gemm_config_file_native() {
   cat >&2 << EOF
 Gemm config file (Strategy native):
-  Gemm config file is a headerless csv file with fields separated by commas and commas only (there cannot be whitespaces
-  around each field).
-
-  Note also comments and extraneous empty lines are not permitted.
+  Gemm config file is a csv file with fields separated by commas. The optional header and comments are ignored by the parser.
 
   A gemm config is a list of 3 positive integers <m0, n0, k0>, with:
   m0 - Number of rows processed by the matrix multiplication
@@ -123,18 +137,18 @@ EOF
 function help_gemm_config_file_reshaped_rhs_only() {
   cat >&2 << EOF
 Gemm config file (Strategy reshaped_rhs_only):
-  Gemm config file is a headerless csv file with fields separated by commas and commas only (there cannot be whitespaces
-  around each field).
-
-  Note also comments and extraneous empty lines are not permitted.
+  Gemm config file is a csv file with fields separated by commas. The optional header and comments are ignored by the parser.
 
-  A gemm config is a list of 4 positive integers <m0, n0, k0, h0> and 2 boolean values interleave_rhs and transpose_rhs, with:
+  A gemm config is a list of 4 positive integers <m0, n0, k0, h0> and 3 boolean values:
   m0 - Number of rows processed by the matrix multiplication
   n0 - Number of columns processed by the matrix multiplication
   k0 - Number of partial accumulations performed by the matrix multiplication
   h0 - Number of horizontal blocks of size (k0xn0) stored on the same output row
   interleave_rhs - Interleave rhs matrix (1) / Do not interleave rhs matrix (0)
   transpose_rhs - Transpose rhs matrix (1) / Do not transpose rhs matrix (0)
+  export_to_cl_image_rhs - (Not supported for quantized types) Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0). Can only be true
+                           with certain combinations of the GEMMParams and other configs. Please refer to CLGEMMReshapeRHSMatrixKernel
+                           for more details
 
   Only the following configurations of M0, N0 and K0 are currently supported:
   M0 = 1, 2, 3, 4, 5, 6, 7, 8
@@ -143,8 +157,8 @@ Gemm config file (Strategy reshaped_rhs_only):
   H0 >= 1
 
   An example gemm config file looks like:
-  4,4,4,1,1,1
-  4,4,4,3,1,0
+  4,4,4,1,1,1,0
+  4,4,4,3,1,0,1
   ...
 
 EOF
@@ -162,12 +176,9 @@ EOF
 function help_gemm_config_file_reshaped() {
   cat >&2 << EOF
 Gemm config file (Strategy reshaped):
-  Gemm config file is a headerless csv file with fields separated by commas and commas only (there cannot be whitespaces
-  around each field).
+  Gemm config file is a csv file with fields separated by commas. The header and comments are ignored by the parser.
 
-  Note also comments and extraneous empty lines are not permitted.
-
-  A gemm config is a list of 5 positive integers <m0, n0, k0, v0, h0> and 3 boolean values interleave_lhs, interleave_rhs and transpose_rhs, with:
+  A gemm config is a list of 5 positive integers <m0, n0, k0, v0, h0> and 4 boolean values:
   m0 - Number of rows processed by the matrix multiplication
   n0 - Number of columns processed by the matrix multiplication
   k0 - Number of partial accumulations performed by the matrix multiplication
@@ -176,6 +187,9 @@ Gemm config file (Strategy reshaped):
   interleave_lhs - Interleave lhs matrix (1) / Do not interleave lhs matrix (0)
   interleave_rhs - Interleave rhs matrix (1) / Do not interleave rhs matrix (0)
   transpose_rhs - Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)
+  export_to_cl_image_rhs - (Not supported for quantized types) Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0). Can only be true
+                           with certain combinations of the GEMMParams and other configs. Please refer to CLGEMMReshapeRHSMatrixKernel
+                           for more details
 
   If rhs matrix is transposed only the following configurations are currently supported:
   M0 = 2, 3, 4, 5, 6, 7, 8
@@ -192,8 +206,8 @@ Gemm config file (Strategy reshaped):
   H0 >= 1
 
   An example gemm config file looks like:
-  4,4,4,1,3,1,1,1
-  4,4,4,3,3,1,1,0
+  4,4,4,1,3,1,1,1,0
+  4,4,4,3,3,1,1,0,1
   ...
 
 EOF
@@ -213,7 +227,7 @@ function usage() {
 Run gemm examples of a selected strategy, over provided tunable configurationsa and gemm shapes.
 Save the benchmark results to json files in an output directory.
 
-Usage: ${CMD} [-h] -s <strategy> -e <example_binary_dir> -g <gemm_shape_file> -c <gemm_config_file> [-o <out_dir>]
+Usage: ${CMD} [-h] -s <strategy> -e <example_binary_dir> -g <gemm_shape_file> -c <gemm_config_file> [-o <out_dir>] [-d <data_type>] [-i <id_experiment_start>] [-n <num_experiments>] [-t <output_extension>]
 
 Options:
         -h
@@ -233,10 +247,35 @@ Options:
         -c <gemm_config_file>
         Path to gemm config csv file
 
+        -d <data_type>
+        Data type option with which to run benchmark examples
+        Default: ${DEFAULT_DATA_TYPE}
+        Supported options:
+        Strategy            :    Data Types
+        Native              :    f32
+        Reshaped            :    f32, f16, qasymm8
+        Reshaped RHS Only   :    f32, f16, qasymm8
+
         -o <out_dir>
         Path to output directory that holds output json files
         Default: ${DEFAULT_OUT_DIR}
 
+        -i <id_experiment_start>
+        ID of the first experiment.
+        Default: ${DEFAULT_ID_EXPERIMENT_START}
+
+        -n <num_experiments>
+        Total number of experiments to execute in this session. [1-all]
+        Default: ${DEFAULT_NUM_EXPERIMENTS}
+
+        -t <output_extension>
+        Output file extension.
+        Default: ${DEFAULT_OUT_EXTENSION}
+
+        -m <tuner_mode>
+        OpenCL tuner mode.
+        Default: ${DEFAULT_TUNER_MODE}
+
 EOF
 # Print help messages about gemm shapes and various gemm configs
 $HELP && help_gemm_shape_file
@@ -315,10 +354,17 @@ function arr_contains() {
 # Globals:
 #   OUT_DIR
 #   OUT_EXTENSION
+#   TUNER_MODE
 #   EXAMPLE_BIN_DIR
 #   NUM_ITERATION
 #   GEMM_CONFIGS_FILE
 #   GEMM_SHAPES_FILE
+#   STRATEGY_OPTION
+#   DATA_TYPE
+#   OUT_DIR
+#   ID_EXPERIMENT_START
+#   NUM_EXPERIMENTS
+
 # Arguments:
 #   example_bin   Name of the example binary to run
 # Returns:
@@ -326,41 +372,97 @@ function arr_contains() {
 #######################################
 function run() {
   local example_bin=$1
-  echo "Running all configs for ${example_bin}" 1>&2
+  echo "Running experiments for ${example_bin}" 1>&2
   local example_args
-  local expr_count=1
+  local json_filename
+  local expr_count=0
+  # Total number of experiments available
+  local num_experiments_total
   # Total number of experiment runs scheduled for this session
-  local total_num_experiment
-  local num_params
-  local num_configs
-  num_params=$( wc -l ${GEMM_SHAPES_FILE} | cut -d " " -f 1)
-  num_configs=$( wc -l ${GEMM_CONFIGS_FILE} | cut -d " " -f 1 )
-  (( total_num_experiment=${num_params} * ${num_configs} ))
+  local num_experiments_session
+  local id_experiment_start
+  local id_experiment_end
+  local array_shapes
+  local array_configs
+  local array_shapes_len
+  local array_configs_len
+  local array_shapes_idx
+  local array_configs_idx
+  local match_expression_shape="^([^,]*,){3}[^,]*$"
+  local match_expression_config="^(\s*[0-9]+\s*,)+\s*[0-9]\s*$"
+  local shapes_list_cmd="grep -E "$match_expression_shape" "${GEMM_SHAPES_FILE}""
+  local configs_list_cmd="grep -E "$match_expression_config" "${GEMM_CONFIGS_FILE}""
+
+  # Create array from CSV file
+  array_shapes=($( $shapes_list_cmd ))
+  array_configs=($( $configs_list_cmd ))
+
+  # Get array length
+  array_shapes_len=${#array_shapes[@]}
+  array_configs_len=${#array_configs[@]}
+
+  # Get the total number of experiments available
+  (( num_experiments_total=${array_shapes_len} * ${array_configs_len} ))
+
+  # Get the number of experiments to execute in this session
+  if [ ${NUM_EXPERIMENTS} == ${DEFAULT_NUM_EXPERIMENTS} ]
+  then
+    (( num_experiments_session=${array_shapes_len} * ${array_configs_len} ))
+  else
+    num_experiments_session=$NUM_EXPERIMENTS
+  fi
+
+  # Id experiment start
+  id_experiment_start=${ID_EXPERIMENT_START}
+
+  # Id experiment end
+  (( id_experiment_end=(${num_experiments_session} + ${id_experiment_start} - 1) ))
+
+  # Check if the id experiment end is grater than or equal to the total number of experiments available.
+  # If the condition is satisfied, clamp the id experiment end
+  if [ "$id_experiment_end" -ge "$num_experiments_total" ]
+  then
+    echo "Clamping idx experiment end" 1>&2
+    (( id_experiment_end=${num_experiments_total} - 1 ))
+    (( num_experiments_session=${id_experiment_start} + ${id_experiment_end} + 1 ))
+  fi
+
   # Time elapsed since the beginning in seconds
   local time_elapsed_s
   # Time estimated to finish in seconds
   local time_est_s
-  echo "Running a total number of ${total_num_experiment} experiments" 1>&2
+  echo "Running a total number of ${num_experiments_session} experiments" 1>&2
+  echo "Experiment idx start/end [${id_experiment_start}, ${id_experiment_end}]" 1>&2
 
-  while read gemm_shape
+  # Run experiments
+  for i in $(seq $id_experiment_start $id_experiment_end);
   do
-    while read gemm_config
-    do
-      echo "Running..." 1>&2
-      example_args="${gemm_shape},${gemm_config}"
-      # Run experiment
-      ${EXAMPLE_BIN_DIR}/${example_bin} --example_args=${example_args} --iterations=${NUM_ITERATION} --json-file=${OUT_DIR}/${expr_count}.${OUT_EXTENSION} --instruments=OPENCL_TIMER_MS
-      # Print progress
-      print_progress ${expr_count} ${total_num_experiment}
-      # Print time statistics
-      time_elapsed_s=$SECONDS
-      echo "Time elapsed since beginning: $(( $time_elapsed_s / 60 ))m $(( $time_elapsed_s % 60 ))s" 1>&2
-      (( time_est_s=(${total_num_experiment} - ${expr_count}) * ${time_elapsed_s} / ${expr_count} ))
-      echo "Time estimated to finish: $(( $time_est_s / 60 ))m $(( $time_est_s % 60 ))s" 1>&2
-      (( expr_count++ ))
-      echo "Done." 1>&2
-    done < "${GEMM_CONFIGS_FILE}"
-  done < "${GEMM_SHAPES_FILE}"
+    (( array_shapes_idx=${i} / ${array_configs_len} ))
+    (( array_configs_idx=${i} % ${array_configs_len} ))
+
+    gemm_shape=${array_shapes[$array_shapes_idx]}
+    gemm_config=${array_configs[$array_configs_idx]}
+
+    echo "Running shape[$array_shapes_idx]=$gemm_shape with config[$array_configs_idx]=$gemm_config" 1>&2
+
+    example_args="${gemm_shape},${gemm_config},--type=${DATA_TYPE},--tuner-mode=${TUNER_MODE}"
+    json_filename="${STRATEGY_OPTION}_${gemm_shape}_${gemm_config}_${DATA_TYPE}"
+    # Replace "," with "_"
+    json_filename=${json_filename//,/_}
+
+    # Run experiment
+    ${EXAMPLE_BIN_DIR}/${example_bin} --example_args=${example_args} --iterations=${NUM_ITERATION} --json-file=${OUT_DIR}/${json_filename}.${OUT_EXTENSION} --instruments=OPENCL_TIMER_MS
+    # Print progress
+    (( expr_count++ ))
+    print_progress ${expr_count} ${num_experiments_session}
+    # Print time statistics
+    time_elapsed_s=$SECONDS
+    echo "Time elapsed since beginning: $(( $time_elapsed_s / 60 ))m $(( $time_elapsed_s % 60 ))s" 1>&2
+    (( time_est_s=(${num_experiments_session} - ${expr_count}) * ${time_elapsed_s} / ${expr_count} ))
+    echo "Time estimated to finish: $(( $time_est_s / 60 ))m $(( $time_est_s % 60 ))s" 1>&2
+    echo "Done." 1>&2
+  done
+
   echo "Finished running all configs for ${example_bin}" 1>&2
   echo "All results saved to ${OUT_DIR}" 1>&2
 }
@@ -404,23 +506,37 @@ EXAMPLE_BIN_DIR=""
 GEMM_SHAPES_FILE=""
 # Path to gemm configs file
 GEMM_CONFIGS_FILE=""
+# Strategy option
 STRATEGY_OPTION=""
+# Data type to use
+DATA_TYPE=${DEFAULT_DATA_TYPE}
 # Path to output directory
 OUT_DIR=${DEFAULT_OUT_DIR}
+# ID of the first experiment
+ID_EXPERIMENT_START=${DEFAULT_ID_EXPERIMENT_START}
+# Total number of experiments to execute in this session
+NUM_EXPERIMENTS=${DEFAULT_NUM_EXPERIMENTS}
 # Output benchmark result file extension
-OUT_EXTENSION="gemmtuner_benchmark"
+OUT_EXTENSION=${DEFAULT_OUT_EXTENSION}
+# OpenCL tuner mode
+TUNER_MODE=${DEFAULT_TUNER_MODE}
 # Toggle help
 HELP=false
 
 # Obtain options
-while getopts "hs:e:g:c:o:" opt; do
+while getopts "hs:e:g:c:d:o:i:n:t:m:" opt; do
   case "$opt" in
     h) HELP=true ;;
     s) STRATEGY_OPTION=$(to_lower "${OPTARG}");;
     e) EXAMPLE_BIN_DIR="${OPTARG}";;
     g) GEMM_SHAPES_FILE="${OPTARG}";;
     c) GEMM_CONFIGS_FILE="${OPTARG}";;
+    d) DATA_TYPE=$(to_lower "${OPTARG}");;
     o) OUT_DIR="${OPTARG}";;
+    i) ID_EXPERIMENT_START="${OPTARG}";;
+    n) NUM_EXPERIMENTS="${OPTARG}";;
+    t) OUT_EXTENSION="${OPTARG}";;
+    m) TUNER_MODE="${OPTARG}";;
   esac
 done
 shift $((OPTIND - 1))
@@ -454,17 +570,27 @@ $HELP &&
 arr_contains "${STRATEGY_OPTION}" "${ALL_STRATEGY_OPTIONS[@]}" ||
   error_msg "Does not support strategy ${STRATEGY_OPTION}"
 
+# Verify data type option is valid
+arr_contains "${DATA_TYPE}" "${ALL_DATA_TYPE_OPTIONS[@]}" ||
+  error_msg "Does not support data type ${DATA_TYPE}"
+
 # Make sure existing benchmark outputs are not overwritten
 [ ! -d "${OUT_DIR}" ] ||
   error_msg "Output directory ${OUT_DIR} already exists!"
 
 # Make output directory
-mkdir ${OUT_DIR}
+echo "Making output directory ${OUT_DIR}" 1>&2
+mkdir -p ${OUT_DIR} || error_msg "Failed to make output directory ${OUT_DIR}"
 
 # Run selected strategy with all configurations
 # Restart the built-in timer
 SECONDS=0
-[ "${STRATEGY_OPTION}" == "native" ] && run $EXAMPLE_BIN_NATIVE
-[ "${STRATEGY_OPTION}" == "reshaped_rhs_only" ] && run $EXAMPLE_BIN_RESHAPED_RHS_ONLY
-[ "${STRATEGY_OPTION}" == "reshaped" ] && run $EXAMPLE_BIN_RESHAPED
+if [ "$DATA_TYPE" == "qasymm8" ]; then
+  [ "${STRATEGY_OPTION}" == "reshaped_rhs_only" ] && run $EXAMPLE_BIN_RESHAPED_RHS_ONLY_LOWP
+  [ "${STRATEGY_OPTION}" == "reshaped" ] && run $EXAMPLE_BIN_RESHAPED_LOWP
+else
+  [ "${STRATEGY_OPTION}" == "native" ] && run $EXAMPLE_BIN_NATIVE
+  [ "${STRATEGY_OPTION}" == "reshaped_rhs_only" ] && run $EXAMPLE_BIN_RESHAPED_RHS_ONLY
+  [ "${STRATEGY_OPTION}" == "reshaped" ] && run $EXAMPLE_BIN_RESHAPED
+fi
 # Main: Main script }}}
diff --git a/examples/gemm_tuner/cl_gemm_native.cpp b/examples/gemm_tuner/cl_gemm_native.cpp
index 0cacd82087..7daa0b07d3 100644
--- a/examples/gemm_tuner/cl_gemm_native.cpp
+++ b/examples/gemm_tuner/cl_gemm_native.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,23 +25,24 @@
 #error "This example needs to be built with -DARM_COMPUTE_CL"
 #endif /* ARM_COMPUTE_CL */
 
-#include "CommonGemmExampleOptions.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/CLTuner.h"
+
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h"
 #include "tests/CL/Helper.h"
-#include "utils/Utils.h"
 #include "utils/command_line/CommandLineOptions.h"
 #include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
 
+#include "CommonGemmExampleOptions.h"
 #include <cstdlib>
 
 using namespace arm_compute;
+using namespace arm_compute::opencl::kernels;
 using namespace utils;
 using namespace arm_compute::misc::shape_calculator;
 using namespace gemm_tuner;
@@ -51,9 +52,9 @@ namespace
 /** Structure holding all tunable gemm configs specific to this example/strategy */
 struct GemmConfigs
 {
-    size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */
-    size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */
-    size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */
+    size_t m0{4}; /**< Number of rows processed by the matrix multiplication */
+    size_t n0{4}; /**< Number of columns processed by the matrix multiplication */
+    size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */
 };
 
 /** Formatted output of the GemmConfigs type
@@ -123,8 +124,8 @@ GemmConfigs consume_gemm_configs(const GemmConfigOptions &options)
 }
 
 } // namespace
-// Create function for CLGEMMMatrixMultiplyNativeKernel
-using CLGEMMMatrixMultiplyNative = test::CLSynthetizeFunction<CLGEMMMatrixMultiplyNativeKernel>;
+// Create function for ClGemmMatrixMultiplyNativeKernel
+using CLGEMMMatrixMultiplyNative = test::CLSynthetizeOperator<ClGemmMatrixMultiplyNativeKernel>;
 
 class CLGEMMMatrixMultiplyNativeExample : public Example
 {
@@ -132,10 +133,9 @@ public:
     bool do_setup(int argc, char **argv) override
     {
         // Default parameters
-        const DataType            data_type = DataType::F32;
-        const float               alpha     = 1.0f;
-        const float               beta      = 0.0f;
-        const ActivationLayerInfo act_info  = ActivationLayerInfo();
+        const float               alpha    = 1.0f;
+        const float               beta     = 0.0f;
+        const ActivationLayerInfo act_info = ActivationLayerInfo();
         CommonGemmExampleParams   params;
         GemmConfigs               configs;
 
@@ -146,13 +146,13 @@ public:
 
         // Parse command line options
         parser.parse(argc, argv);
-        if(param_options.help->is_set() && param_options.help->value())
+        if (param_options.help->is_set() && param_options.help->value())
         {
             // Print help message
             parser.print_help(argv[0]);
             return false;
         }
-        if(!parser.validate())
+        if (!parser.validate())
         {
             // Invalid arguments. Use default parameters and configs
             std::cerr << "Invalid arguments." << std::endl;
@@ -167,16 +167,18 @@ public:
         }
 
         // Print gemm parameters and configurations
-        std::cerr << "Gemm parameters:" << std::endl;
-        std::cerr << params << std::endl;
-        std::cerr << "Gemm configurations:" << std::endl;
-        std::cerr << configs << std::endl;
+        std::cout << "Gemm parameters:" << std::endl;
+        std::cout << params << std::endl;
+        std::cout << "Gemm configurations:" << std::endl;
+        std::cout << configs << std::endl;
+
+        tuner.set_tuner_mode(params.tuner_mode);
 
         CLScheduler::get().default_init(&tuner);
 
-        lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, data_type));
-        rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, data_type));
-        bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, data_type));
+        lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type));
+        rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type));
+        bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, params.data_type));
 
         GEMMLHSMatrixInfo lhs_info;
         lhs_info.m0 = configs.m0;
@@ -195,8 +197,20 @@ public:
         kernel_info.broadcast_bias          = true;
         kernel_info.activation_info         = act_info;
 
+        // Validate argments
+        Status status{};
+        status = gemm.validate(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info,
+                               kernel_info);
+        if (!status)
+        {
+            // Unsupported arguments
+            std::cerr << "Unsupported arguments." << std::endl;
+            std::cerr << "Check documentation for supported/unsupported combinations" << std::endl;
+            return false;
+        }
+
         // Configure function
-        gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info);
+        gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info);
 
         // Allocate tensors
         lhs.allocator()->allocate();
@@ -209,7 +223,8 @@ public:
     void do_run() override
     {
         // Execute the function
-        gemm.run();
+        ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, {ACL_SRC_1, &rhs}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}});
+        gemm.run(gemm_pack);
 
         // Make sure all the OpenCL jobs are done executing:
         CLScheduler::get().sync();
diff --git a/examples/gemm_tuner/cl_gemm_reshaped.cpp b/examples/gemm_tuner/cl_gemm_reshaped.cpp
index e579ed762c..75f3539cb9 100644
--- a/examples/gemm_tuner/cl_gemm_reshaped.cpp
+++ b/examples/gemm_tuner/cl_gemm_reshaped.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,24 +25,26 @@
 #error "This example needs to be built with -DARM_COMPUTE_CL"
 #endif /* ARM_COMPUTE_CL */
 
-#include "CommonGemmExampleOptions.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h"
-#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/CLTuner.h"
+
+#include "examples/gemm_tuner/CommonGemmExampleOptions.h"
+#include "examples/gemm_tuner/GemmTunerHelpers.h"
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
 #include "tests/CL/Helper.h"
-#include "utils/Utils.h"
 #include "utils/command_line/CommandLineOptions.h"
 #include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
 
 #include <cstdlib>
 
 using namespace arm_compute;
+using namespace arm_compute::opencl::kernels;
 using namespace utils;
 using namespace arm_compute::misc::shape_calculator;
 using namespace gemm_tuner;
@@ -52,15 +54,16 @@ namespace
 /** Structure holding all tunable gemm configs specific to this example/strategy */
 struct GemmConfigs
 {
-    size_t m0{ 4 };                /**< Number of rows processed by the matrix multiplication */
-    size_t n0{ 4 };                /**< Number of columns processed by the matrix multiplication */
-    size_t k0{ 4 };                /**< Number of partial accumulations performed by the matrix multiplication */
-    size_t v0{ 1 };                /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
-    size_t h0{ 1 };                /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
-    bool   interleave_lhs{ true }; /**< Interleave lhs matrix */
-    bool   transpose_lhs{ true };  /**< Transpose lhs matrix. */
-    bool   interleave_rhs{ true }; /**< Interleave rhs matrix */
-    bool   transpose_rhs{ true };  /**< Transpose rhs matrix. */
+    size_t m0{4};                /**< Number of rows processed by the matrix multiplication */
+    size_t n0{4};                /**< Number of columns processed by the matrix multiplication */
+    size_t k0{4};                /**< Number of partial accumulations performed by the matrix multiplication */
+    size_t v0{1};                /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
+    size_t h0{1};                /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+    bool   interleave_lhs{true}; /**< Interleave lhs matrix */
+    bool   transpose_lhs{true};  /**< Transpose lhs matrix. */
+    bool   interleave_rhs{true}; /**< Interleave rhs matrix */
+    bool   transpose_rhs{true};  /**< Transpose rhs matrix. */
+    bool   export_to_cl_image_rhs{true}; /**< Export rhs matrix to cl_image. */
 };
 
 /** Formatted output of the GemmConfigs type
@@ -84,6 +87,7 @@ struct GemmConfigs
     os << "transpose_lhs : " << (configs.transpose_lhs ? true_str : false_str) << std::endl;
     os << "interleave_rhs : " << (configs.interleave_rhs ? true_str : false_str) << std::endl;
     os << "transpose_rhs : " << (configs.transpose_rhs ? true_str : false_str) << std::endl;
+    os << "export_to_cl_image_rhs : " << (configs.export_to_cl_image_rhs ? true_str : false_str) << std::endl;
     return os;
 }
 
@@ -103,7 +107,8 @@ public:
           h0(parser.add_positional_option<SimpleOption<size_t>>("h0", 1)),
           interleave_lhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_lhs", 1)),
           interleave_rhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_rhs", 1)),
-          transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1))
+          transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1)),
+          export_to_cl_image_rhs(parser.add_positional_option<SimpleOption<size_t>>("export_to_cl_image_rhs", 1))
     {
         m0->set_help("Number of rows processed by the matrix multiplication");
         n0->set_help("Number of columns processed by the matrix multiplication");
@@ -115,7 +120,10 @@ public:
         // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
         // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
         // 2 variants (both transposed and none transposed)
-        transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)");
+        transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do "
+                                "transpose lhs matrix (0)");
+        export_to_cl_image_rhs->set_help(
+            "Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)");
     }
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     GemmConfigOptions(const GemmConfigOptions &) = delete;
@@ -128,17 +136,19 @@ public:
     /** Default destructor */
     ~GemmConfigOptions() = default;
 
-    SimpleOption<size_t> *m0;             /**< Number of rows processed by the matrix multiplication option */
-    SimpleOption<size_t> *n0;             /**< Number of columns processed by the matrix multiplication option */
-    SimpleOption<size_t> *k0;             /**< Number of partial accumulations performed by the matrix multiplication option */
-    SimpleOption<size_t> *v0;             /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */
-    SimpleOption<size_t> *h0;             /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
+    SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
+    SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
+    SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
+    SimpleOption<size_t> *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */
+    SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
     SimpleOption<size_t> *interleave_lhs; /**< Interleave lhs matrix option (1 enable; 0 disable) */
     SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */
     // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
     // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
     // 2 variants (both transposed and none transposed)
-    SimpleOption<size_t> *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */
+    SimpleOption<size_t>                   *
+        transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */
+    SimpleOption<size_t> *export_to_cl_image_rhs; /**< Export rhs matrix to cl_image.*/
 };
 
 /** Consumes the gemm configuration options and creates a structure containing all information
@@ -159,17 +169,19 @@ GemmConfigs consume_gemm_configs(const GemmConfigOptions &options)
     // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
     // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
     // 2 variants (both transposed and none transposed)
-    configs.transpose_lhs  = options.transpose_rhs->value() == 0;
-    configs.interleave_rhs = options.interleave_rhs->value() != 0;
-    configs.transpose_rhs  = options.transpose_rhs->value() != 0;
+    configs.transpose_lhs          = options.transpose_rhs->value() == 0;
+    configs.interleave_rhs         = options.interleave_rhs->value() != 0;
+    configs.transpose_rhs          = options.transpose_rhs->value() != 0;
+    configs.export_to_cl_image_rhs = options.export_to_cl_image_rhs->value() != 0;
     return configs;
 }
 
 } // namespace
-// Create function for CLGEMMReshapeLHSMatrixKernel
-using CLGEMMReshapeLHSMatrix = test::CLSynthetizeFunction<CLGEMMReshapeLHSMatrixKernel>;
-// Create function for CLGEMMMatrixMultiplyReshapedKernel
-using CLGEMMMatrixMultiplyReshaped = test::CLSynthetizeFunction<CLGEMMMatrixMultiplyReshapedKernel>;
+
+// Create function for ClGemmReshapeLhsMatrixKernel
+using CLGEMMReshapeLHSMatrix = test::CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>;
+// Create function for ClGemmMatrixMultiplyReshapedKernel
+using CLGEMMMatrixMultiplyReshaped = test::CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedKernel>;
 
 class CLGEMMMatrixMultiplyReshapedExample : public Example
 {
@@ -177,10 +189,9 @@ public:
     bool do_setup(int argc, char **argv) override
     {
         // Default parameters
-        const DataType            data_type = DataType::F32;
-        const float               alpha     = 1.0f;
-        const float               beta      = 0.0f;
-        const ActivationLayerInfo act_info  = ActivationLayerInfo();
+        const float               alpha    = 1.0f;
+        const float               beta     = 0.0f;
+        const ActivationLayerInfo act_info = ActivationLayerInfo();
         CommonGemmExampleParams   params;
         GemmConfigs               configs;
 
@@ -191,13 +202,13 @@ public:
 
         // Parse command line options
         parser.parse(argc, argv);
-        if(param_options.help->is_set() && param_options.help->value())
+        if (param_options.help->is_set() && param_options.help->value())
         {
             // Print help message
             parser.print_help(argv[0]);
             return false;
         }
-        if(!parser.validate())
+        if (!parser.validate())
         {
             // Invalid arguments. Use default parameters and configs
             std::cerr << "Invalid arguments." << std::endl;
@@ -212,16 +223,18 @@ public:
         }
 
         // Print gemm parameters and configurations
-        std::cerr << "Gemm parameters:" << std::endl;
-        std::cerr << params << std::endl;
-        std::cerr << "Gemm configurations:" << std::endl;
-        std::cerr << configs << std::endl;
+        std::cout << "Gemm parameters:" << std::endl;
+        std::cout << params << std::endl;
+        std::cout << "Gemm configurations:" << std::endl;
+        std::cout << configs << std::endl;
+
+        tuner.set_tuner_mode(params.tuner_mode);
 
         CLScheduler::get().default_init(&tuner);
 
-        lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, data_type));
-        rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, data_type));
-        bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, data_type));
+        lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type));
+        rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type));
+        bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, params.data_type));
 
         GEMMLHSMatrixInfo lhs_info;
         lhs_info.m0         = configs.m0;
@@ -231,11 +244,12 @@ public:
         lhs_info.transpose  = configs.transpose_lhs;
 
         GEMMRHSMatrixInfo rhs_info;
-        rhs_info.n0         = configs.n0;
-        rhs_info.k0         = configs.k0;
-        rhs_info.h0         = configs.h0;
-        rhs_info.interleave = configs.interleave_rhs;
-        rhs_info.transpose  = configs.transpose_rhs;
+        rhs_info.n0                 = configs.n0;
+        rhs_info.k0                 = configs.k0;
+        rhs_info.h0                 = configs.h0;
+        rhs_info.interleave         = configs.interleave_rhs;
+        rhs_info.transpose          = configs.transpose_rhs;
+        rhs_info.export_to_cl_image = configs.export_to_cl_image_rhs;
 
         GEMMKernelInfo kernel_info;
         kernel_info.m                       = params.M;
@@ -246,17 +260,55 @@ public:
         kernel_info.broadcast_bias          = true;
         kernel_info.activation_info         = act_info;
 
+        if (rhs_info.h0 == 0)
+        {
+            rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U);
+        }
+
         // Initialise lhs_reshaped tensor info
-        auto_init_if_empty(*lhs_reshaped.info(), lhs.info()->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*lhs.info(), lhs_info)));
+        lhs_reshaped.allocator()->init(
+            TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type));
 
         // Initialise rhs_reshaped tensor info
-        auto_init_if_empty(*rhs_reshaped.info(), rhs.info()->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*rhs.info(), rhs_info)));
+        rhs_reshaped.allocator()->init(
+            TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
+
+        if (rhs_info.export_to_cl_image)
+        {
+            if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
+            {
+                std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl;
+                return false;
+            }
+        }
+
+        // Validate argments
+        Status status{};
+        status = reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, kernel_info.reinterpret_input_as_3d);
+        if (!status)
+        {
+            // Unsupported arguments
+            std::cerr << "Unsupported arguments." << std::endl;
+            std::cerr << "Check documentation for supported/unsupported combinations" << std::endl;
+            return false;
+        }
+
+        status = gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info,
+                               rhs_info, kernel_info);
+        if (!status)
+        {
+            // Unsupported arguments
+            std::cerr << "Unsupported arguments." << std::endl;
+            std::cerr << "Check documentation for supported/unsupported combinations" << std::endl;
+            return false;
+        }
 
         // Configure reshape lhs function
-        reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info);
+        reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
 
         // Configure function
-        gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info);
+        gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info,
+                       rhs_info, kernel_info);
 
         // Allocate tensors
         lhs.allocator()->allocate();
@@ -270,9 +322,13 @@ public:
     }
     void do_run() override
     {
-        // Execute the function
-        reshape_lhs.run();
-        gemm.run();
+        // Execute the functions
+        ITensorPack reshape_lsh_pack({{ACL_SRC, &lhs}, {ACL_DST, &lhs_reshaped}});
+        reshape_lhs.run(reshape_lsh_pack);
+
+        ITensorPack gemm_pack(
+            {{ACL_SRC_0, &lhs_reshaped}, {ACL_SRC_1, &rhs_reshaped}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}});
+        gemm.run(gemm_pack);
 
         // Make sure all the OpenCL jobs are done executing:
         CLScheduler::get().sync();
@@ -297,7 +353,7 @@ private:
 /** Main program for gemm reshaped test
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] v0, [optional] h0, [optional] interleave_lhs, [optional] interleave_rhs, [optional] transpose_rhs )
+ * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] v0, [optional] h0, [optional] interleave_lhs, [optional] interleave_rhs, [optional] transpose_rhs, [optional] export_to_cl_image )
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
index 0d161aab2d..cfea2c9bac 100644
--- a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
+++ b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019-2021, 2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -25,23 +25,25 @@
 #error "This example needs to be built with -DARM_COMPUTE_CL"
 #endif /* ARM_COMPUTE_CL */
 
-#include "CommonGemmExampleOptions.h"
-#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h"
 #include "arm_compute/core/Helpers.h"
 #include "arm_compute/core/KernelDescriptors.h"
 #include "arm_compute/core/Types.h"
 #include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
 #include "arm_compute/runtime/CL/CLScheduler.h"
 #include "arm_compute/runtime/CL/CLTuner.h"
+
+#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h"
 #include "tests/CL/Helper.h"
-#include "utils/Utils.h"
 #include "utils/command_line/CommandLineOptions.h"
 #include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
 
+#include "CommonGemmExampleOptions.h"
+#include "GemmTunerHelpers.h"
 #include <cstdlib>
 
 using namespace arm_compute;
+using namespace arm_compute::opencl::kernels;
 using namespace utils;
 using namespace arm_compute::misc::shape_calculator;
 using namespace gemm_tuner;
@@ -51,12 +53,13 @@ namespace
 /** Structure holding all tunable gemm configs specific to this example/strategy */
 struct GemmConfigs
 {
-    size_t m0{ 4 };                /**< Number of rows processed by the matrix multiplication */
-    size_t n0{ 4 };                /**< Number of columns processed by the matrix multiplication */
-    size_t k0{ 4 };                /**< Number of partial accumulations performed by the matrix multiplication */
-    size_t h0{ 1 };                /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
-    bool   interleave_rhs{ true }; /**< Interleave rhs matrix */
-    bool   transpose_rhs{ true };  /**< Transpose rhs matrix */
+    size_t m0{4};                /**< Number of rows processed by the matrix multiplication */
+    size_t n0{4};                /**< Number of columns processed by the matrix multiplication */
+    size_t k0{4};                /**< Number of partial accumulations performed by the matrix multiplication */
+    size_t h0{1};                /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+    bool   interleave_rhs{true}; /**< Interleave rhs matrix */
+    bool   transpose_rhs{true};  /**< Transpose rhs matrix */
+    bool   export_to_cl_image_rhs{true}; /**< Export rhs matrix to cl_image.*/
 };
 
 /** Formatted output of the GemmConfigs type
@@ -77,6 +80,7 @@ struct GemmConfigs
     os << "h0 : " << configs.h0 << std::endl;
     os << "interleave_rhs : " << (configs.interleave_rhs ? true_str : false_str) << std::endl;
     os << "transpose_rhs : " << (configs.transpose_rhs ? true_str : false_str) << std::endl;
+    os << "export_to_cl_image_rhs : " << (configs.export_to_cl_image_rhs ? true_str : false_str) << std::endl;
     return os;
 }
 
@@ -94,7 +98,8 @@ public:
           k0(parser.add_positional_option<SimpleOption<size_t>>("k0", 4)),
           h0(parser.add_positional_option<SimpleOption<size_t>>("h0", 1)),
           interleave_rhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_rhs", 1)),
-          transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1))
+          transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1)),
+          export_to_cl_image_rhs(parser.add_positional_option<SimpleOption<size_t>>("export_to_cl_image_rhs", 1))
     {
         m0->set_help("Number of rows processed by the matrix multiplication");
         n0->set_help("Number of columns processed by the matrix multiplication");
@@ -102,6 +107,8 @@ public:
         h0->set_help("Number of horizontal blocks of size (k0xn0) stored on the same output row");
         interleave_rhs->set_help("Interleave rhs matrix (1) / Do not interleave rhs matrix (0)");
         transpose_rhs->set_help("Transpose rhs matrix (1) / Do not transpose rhs matrix (0)");
+        export_to_cl_image_rhs->set_help(
+            "Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)");
     }
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     GemmConfigOptions(const GemmConfigOptions &) = delete;
@@ -114,12 +121,13 @@ public:
     /** Default destructor */
     ~GemmConfigOptions() = default;
 
-    SimpleOption<size_t> *m0;             /**< Number of rows processed by the matrix multiplication option */
-    SimpleOption<size_t> *n0;             /**< Number of columns processed by the matrix multiplication option */
-    SimpleOption<size_t> *k0;             /**< Number of partial accumulations performed by the matrix multiplication option */
-    SimpleOption<size_t> *h0;             /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
-    SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */
-    SimpleOption<size_t> *transpose_rhs;  /**< Transpose rhs matrix option (1 enable; 0 disable) */
+    SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
+    SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
+    SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
+    SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
+    SimpleOption<size_t> *interleave_rhs;         /**< Interleave rhs matrix option (1 enable; 0 disable) */
+    SimpleOption<size_t> *transpose_rhs;          /**< Transpose rhs matrix option (1 enable; 0 disable) */
+    SimpleOption<size_t> *export_to_cl_image_rhs; /**< Export rhs matrix to cl_image.*/
 };
 
 /** Consumes the gemm configuration options and creates a structure containing all information
@@ -131,18 +139,19 @@ public:
 GemmConfigs consume_gemm_configs(const GemmConfigOptions &options)
 {
     GemmConfigs configs;
-    configs.m0             = options.m0->value();
-    configs.n0             = options.n0->value();
-    configs.k0             = options.k0->value();
-    configs.h0             = options.h0->value();
-    configs.interleave_rhs = options.interleave_rhs->value() != 0;
-    configs.transpose_rhs  = options.transpose_rhs->value() != 0;
+    configs.m0                     = options.m0->value();
+    configs.n0                     = options.n0->value();
+    configs.k0                     = options.k0->value();
+    configs.h0                     = options.h0->value();
+    configs.interleave_rhs         = options.interleave_rhs->value() != 0;
+    configs.transpose_rhs          = options.transpose_rhs->value() != 0;
+    configs.export_to_cl_image_rhs = options.export_to_cl_image_rhs->value() != 0;
     return configs;
 }
 
 } // namespace
-// Create function for CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
-using CLGEMMMatrixMultiplyReshapedOnlyRHS = test::CLSynthetizeFunction<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>;
+// Create function for ClGemmMatrixMultiplyReshapedOnlyRhsKernel
+using CLGEMMMatrixMultiplyReshapedOnlyRHS = test::CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>;
 
 class CLGEMMMatrixMultiplyReshapedOnlyRHSExample : public Example
 {
@@ -150,10 +159,9 @@ public:
     bool do_setup(int argc, char **argv) override
     {
         // Default parameters
-        const DataType            data_type = DataType::F32;
-        const float               alpha     = 1.0f;
-        const float               beta      = 0.0f;
-        const ActivationLayerInfo act_info  = ActivationLayerInfo();
+        const float               alpha    = 1.0f;
+        const float               beta     = 0.0f;
+        const ActivationLayerInfo act_info = ActivationLayerInfo();
         CommonGemmExampleParams   params;
         GemmConfigs               configs;
 
@@ -164,13 +172,13 @@ public:
 
         // Parse command line options
         parser.parse(argc, argv);
-        if(param_options.help->is_set() && param_options.help->value())
+        if (param_options.help->is_set() && param_options.help->value())
         {
             // Print help message
             parser.print_help(argv[0]);
             return false;
         }
-        if(!parser.validate())
+        if (!parser.validate())
         {
             // Invalid arguments. Use default parameters and configs
             std::cerr << "Invalid arguments." << std::endl;
@@ -185,27 +193,30 @@ public:
         }
 
         // Print gemm parameters and configurations
-        std::cerr << "Gemm parameters:" << std::endl;
-        std::cerr << params << std::endl;
-        std::cerr << "Gemm configurations:" << std::endl;
-        std::cerr << configs << std::endl;
+        std::cout << "Gemm parameters:" << std::endl;
+        std::cout << params << std::endl;
+        std::cout << "Gemm configurations:" << std::endl;
+        std::cout << configs << std::endl;
+
+        tuner.set_tuner_mode(params.tuner_mode);
 
         CLScheduler::get().default_init(&tuner);
 
-        lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, data_type));
-        rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, data_type));
-        bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, data_type));
+        lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type));
+        rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type));
+        bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, params.data_type));
 
         GEMMLHSMatrixInfo lhs_info;
         lhs_info.m0 = configs.m0;
         lhs_info.k0 = configs.k0;
 
         GEMMRHSMatrixInfo rhs_info;
-        rhs_info.n0         = configs.n0;
-        rhs_info.k0         = configs.k0;
-        rhs_info.h0         = configs.h0;
-        rhs_info.interleave = configs.interleave_rhs;
-        rhs_info.transpose  = configs.transpose_rhs;
+        rhs_info.n0                 = configs.n0;
+        rhs_info.k0                 = configs.k0;
+        rhs_info.h0                 = configs.h0;
+        rhs_info.interleave         = configs.interleave_rhs;
+        rhs_info.transpose          = configs.transpose_rhs;
+        rhs_info.export_to_cl_image = configs.export_to_cl_image_rhs;
 
         GEMMKernelInfo kernel_info;
         kernel_info.m                       = params.M;
@@ -216,11 +227,39 @@ public:
         kernel_info.broadcast_bias          = true;
         kernel_info.activation_info         = act_info;
 
+        if (rhs_info.h0 == 0)
+        {
+            rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U);
+        }
+
         // Initialise rhs_reshaped tensor info
-        auto_init_if_empty(*rhs_reshaped.info(), rhs.info()->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*rhs.info(), rhs_info)));
+        rhs_reshaped.allocator()->init(
+            TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
+
+        if (rhs_info.export_to_cl_image)
+        {
+            if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
+            {
+                std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl;
+                return false;
+            }
+        }
+
+        // Validate argments
+        Status status{};
+        status = gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info,
+                               rhs_info, kernel_info);
+        if (!status)
+        {
+            // Unsupported arguments
+            std::cerr << "Unsupported arguments." << std::endl;
+            std::cerr << "Check documentation for supported/unsupported combinations" << std::endl;
+            return false;
+        }
 
         // Configure function
-        gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info);
+        gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info,
+                       kernel_info);
 
         // Allocate tensors
         lhs.allocator()->allocate();
@@ -234,7 +273,8 @@ public:
     void do_run() override
     {
         // Execute the function
-        gemm.run();
+        ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, {ACL_SRC_1, &rhs_reshaped}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}});
+        gemm.run(gemm_pack);
 
         // Make sure all the OpenCL jobs are done executing:
         CLScheduler::get().sync();
@@ -257,7 +297,7 @@ private:
 /** Main program for gemm reshaped rhs only test
  *
  * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] h0, [optional] interleave_rhs, [optional] transpose_rhs )
+ * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] h0, [optional] interleave_rhs, [optional] transpose_rhs, [optional] export_to_cl_image)
  */
 int main(int argc, char **argv)
 {
diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
new file mode 100644
index 0000000000..3808b98b7d
--- /dev/null
+++ b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp
@@ -0,0 +1,335 @@
+/*
+ * Copyright (c) 2020-2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
+#error "This example needs to be built with -DARM_COMPUTE_CL"
+#endif /* ARM_COMPUTE_CL */
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTuner.h"
+
+#include "examples/gemm_tuner/CommonGemmExampleOptions.h"
+#include "examples/gemm_tuner/GemmTunerHelpers.h"
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.h"
+#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h"
+#include "tests/CL/Helper.h"
+#include "utils/command_line/CommandLineOptions.h"
+#include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
+
+#include <cstdlib>
+
+using namespace arm_compute;
+using namespace arm_compute::opencl::kernels;
+using namespace utils;
+using namespace arm_compute::misc::shape_calculator;
+using namespace gemm_tuner;
+
+namespace
+{
+/** Structure holding all tunable gemm configs specific to this example/strategy */
+struct GemmConfigs
+{
+    size_t m0{4};                /**< Number of rows processed by the matrix multiplication */
+    size_t n0{4};                /**< Number of columns processed by the matrix multiplication */
+    size_t k0{4};                /**< Number of partial accumulations performed by the matrix multiplication */
+    size_t v0{1};                /**< Number of vertical blocks of size (m0xk0) stored on the same output row */
+    size_t h0{1};                /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+    bool   interleave_lhs{true}; /**< Interleave lhs matrix */
+    bool   transpose_lhs{true};  /**< Transpose lhs matrix. */
+    bool   interleave_rhs{true}; /**< Interleave rhs matrix */
+    bool   transpose_rhs{true};  /**< Transpose rhs matrix. */
+};
+
+/** Formatted output of the GemmConfigs type
+ *
+ * @param[out] os      Output stream.
+ * @param[in]  configs Tunable configurations to output
+ *
+ * @return Modified output stream.
+ */
+::std::ostream &operator<<(::std::ostream &os, const GemmConfigs &configs)
+{
+    std::string false_str = std::string("false");
+    std::string true_str  = std::string("true");
+
+    os << "m0 : " << configs.m0 << std::endl;
+    os << "n0 : " << configs.n0 << std::endl;
+    os << "k0 : " << configs.k0 << std::endl;
+    os << "v0 : " << configs.v0 << std::endl;
+    os << "h0 : " << configs.h0 << std::endl;
+    os << "interleave_lhs : " << (configs.interleave_lhs ? true_str : false_str) << std::endl;
+    os << "transpose_lhs : " << (configs.transpose_lhs ? true_str : false_str) << std::endl;
+    os << "interleave_rhs : " << (configs.interleave_rhs ? true_str : false_str) << std::endl;
+    os << "transpose_rhs : " << (configs.transpose_rhs ? true_str : false_str) << std::endl;
+    return os;
+}
+
+/** Command line options for gemm configs */
+class GemmConfigOptions
+{
+public:
+    /** Constructor
+     *
+     * @param[in,out] parser A parser on which "parse()" hasn't been called yet.
+     */
+    GemmConfigOptions(CommandLineParser &parser)
+        : m0(parser.add_positional_option<SimpleOption<size_t>>("m0", 4)),
+          n0(parser.add_positional_option<SimpleOption<size_t>>("n0", 4)),
+          k0(parser.add_positional_option<SimpleOption<size_t>>("k0", 4)),
+          v0(parser.add_positional_option<SimpleOption<size_t>>("v0", 1)),
+          h0(parser.add_positional_option<SimpleOption<size_t>>("h0", 1)),
+          interleave_lhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_lhs", 1)),
+          interleave_rhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_rhs", 1)),
+          transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1))
+    {
+        m0->set_help("Number of rows processed by the matrix multiplication");
+        n0->set_help("Number of columns processed by the matrix multiplication");
+        k0->set_help("Number of partial accumulations performed by the matrix multiplication");
+        v0->set_help("Number of vertical blocks of size (m0xk0) stored on the same output row");
+        h0->set_help("Number of horizontal blocks of size (k0xn0) stored on the same output row");
+        interleave_lhs->set_help("Interleave lhs matrix (1) / Do not interleave lhs matrix (0)");
+        interleave_rhs->set_help("Interleave rhs matrix (1) / Do not interleave rhs matrix (0)");
+        // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
+        // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
+        // 2 variants (both transposed and none transposed)
+        transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do "
+                                "transpose lhs matrix (0)");
+    }
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GemmConfigOptions(const GemmConfigOptions &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GemmConfigOptions &operator=(const GemmConfigOptions &) = delete;
+    /** Allow instances of this class to be moved */
+    GemmConfigOptions(GemmConfigOptions &&) = default;
+    /** Allow instances of this class to be moved */
+    GemmConfigOptions &operator=(GemmConfigOptions &&) = default;
+    /** Default destructor */
+    ~GemmConfigOptions() = default;
+
+    SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
+    SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
+    SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
+    SimpleOption<size_t> *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */
+    SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
+    SimpleOption<size_t> *interleave_lhs; /**< Interleave lhs matrix option (1 enable; 0 disable) */
+    SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */
+    // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
+    // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
+    // 2 variants (both transposed and none transposed)
+    SimpleOption<size_t> *
+        transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */
+};
+
+/** Consumes the gemm configuration options and creates a structure containing all information
+ *
+ * @param[in] options Options to consume
+ *
+ * @return Structure containing the gemm configurations
+ */
+GemmConfigs consume_gemm_configs(const GemmConfigOptions &options)
+{
+    GemmConfigs configs;
+    configs.m0             = options.m0->value();
+    configs.n0             = options.n0->value();
+    configs.k0             = options.k0->value();
+    configs.v0             = options.v0->value();
+    configs.h0             = options.h0->value();
+    configs.interleave_lhs = options.interleave_lhs->value() != 0;
+    // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and
+    // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other
+    // 2 variants (both transposed and none transposed)
+    configs.transpose_lhs  = options.transpose_rhs->value() == 0;
+    configs.interleave_rhs = options.interleave_rhs->value() != 0;
+    configs.transpose_rhs  = options.transpose_rhs->value() != 0;
+    return configs;
+}
+
+} // namespace
+
+using ClGemmReshapeLHSMatrix           = test::CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>;
+using ClGemmLowpMatrixMultiplyReshaped = test::CLSynthetizeOperator<ClGemmLowpMatrixMultiplyReshapedKernel>;
+
+class CLGEMMLowpMatrixMultiplyReshapedExample : public Example
+{
+public:
+    bool do_setup(int argc, char **argv) override
+    {
+        // Default parameters
+        CommonGemmExampleParams params;
+        GemmConfigs             configs;
+
+        // Parse command line options
+        CommandLineParser        parser;
+        CommonGemmExampleOptions param_options(parser, DataType::QASYMM8);
+        GemmConfigOptions        config_options(parser);
+
+        parser.parse(argc, argv);
+        if (param_options.help->is_set() && param_options.help->value())
+        {
+            parser.print_help(argv[0]);
+            return false;
+        }
+        if (!parser.validate())
+        {
+            // Invalid arguments. Use default parameters and configs
+            std::cerr << "Invalid arguments." << std::endl;
+            parser.print_help(argv[0]);
+            std::cerr << "Falling back to default parameters and configs" << std::endl;
+        }
+        else
+        {
+            params  = consume_common_gemm_example_parameters(param_options);
+            configs = consume_gemm_configs(config_options);
+        }
+
+        std::cout << "Gemm parameters:" << std::endl;
+        std::cout << params << std::endl;
+        std::cout << "Gemm configurations:" << std::endl;
+        std::cout << configs << std::endl;
+
+        tuner.set_tuner_mode(params.tuner_mode);
+
+        CLScheduler::get().default_init(&tuner);
+
+        lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type));
+        rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type));
+
+        // Set arbitrary quantization information
+        const QuantizationInfo q_info{0.012, 3};
+        lhs.info()->set_quantization_info(q_info);
+        rhs.info()->set_quantization_info(q_info);
+        dst.info()->set_quantization_info(q_info);
+
+        GEMMLHSMatrixInfo lhs_info;
+        lhs_info.m0         = configs.m0;
+        lhs_info.k0         = configs.k0;
+        lhs_info.v0         = configs.v0;
+        lhs_info.interleave = configs.interleave_lhs;
+        lhs_info.transpose  = configs.transpose_lhs;
+
+        GEMMRHSMatrixInfo rhs_info;
+        rhs_info.n0                 = configs.n0;
+        rhs_info.k0                 = configs.k0;
+        rhs_info.h0                 = configs.h0;
+        rhs_info.interleave         = configs.interleave_rhs;
+        rhs_info.transpose          = configs.transpose_rhs;
+        rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet
+
+        if (rhs_info.h0 == 0)
+        {
+            rhs_info.h0 = std::max(static_cast<unsigned int>(params.N) / rhs_info.n0, 1U);
+        }
+
+        lhs_reshaped.allocator()->init(
+            TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type));
+        rhs_reshaped.allocator()->init(
+            TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
+        lhs_reshaped.info()->set_quantization_info(q_info);
+        rhs_reshaped.info()->set_quantization_info(q_info);
+
+        if (rhs_info.export_to_cl_image)
+        {
+            if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
+            {
+                std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl;
+                return false;
+            }
+        }
+
+        GEMMReshapeInfo gemm_info{static_cast<int>(params.M),
+                                  static_cast<int>(params.N),
+                                  static_cast<int>(params.K),
+                                  static_cast<int>(configs.h0),
+                                  static_cast<int>(configs.v0),
+                                  0,
+                                  false,
+                                  true};
+
+        // Validate argments
+        if (!reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, gemm_info.reinterpret_input_as_3d()))
+        {
+            std::cerr << "Invalid arguments for ClGemmReshapeLHSMatrixKernel." << std::endl;
+            return false;
+        }
+
+        if (!gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info))
+        {
+            std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedKernel." << std::endl;
+            return false;
+        }
+
+        // Configure functions
+        reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info);
+
+        gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info);
+
+        // Allocate tensors
+        lhs.allocator()->allocate();
+        rhs.allocator()->allocate();
+        lhs_reshaped.allocator()->allocate();
+        rhs_reshaped.allocator()->allocate();
+        dst.allocator()->allocate();
+
+        return true;
+    }
+    void do_run() override
+    {
+        ITensorPack reshape_lsh_pack({{ACL_SRC, &lhs}, {ACL_DST, &lhs_reshaped}});
+        reshape_lhs.run(reshape_lsh_pack);
+
+        ITensorPack gemm_pack({{ACL_SRC_0, &lhs_reshaped}, {ACL_SRC_1, &rhs_reshaped}, {ACL_DST, &dst}});
+        gemm.run(gemm_pack);
+
+        // Make sure all the OpenCL jobs are done executing:
+        CLScheduler::get().sync();
+    }
+
+    void do_teardown() override
+    {
+    }
+
+private:
+    CLTensor                         lhs{};
+    CLTensor                         rhs{};
+    CLTensor                         lhs_reshaped{};
+    CLTensor                         rhs_reshaped{};
+    CLTensor                         dst{};
+    CLTuner                          tuner{};
+    ClGemmReshapeLHSMatrix           reshape_lhs{};
+    ClGemmLowpMatrixMultiplyReshaped gemm{};
+};
+
+/** Main test program for gemmlowp reshaped
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] v0, [optional] h0, [optional] interleave_lhs, [optional] interleave_rhs, [optional] transpose_rhs )
+ */
+int main(int argc, char **argv)
+{
+    return run_example<CLGEMMLowpMatrixMultiplyReshapedExample>(argc, argv);
+}
diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp
new file mode 100644
index 0000000000..4acb316a3c
--- /dev/null
+++ b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp
@@ -0,0 +1,391 @@
+/*
+ * Copyright (c) 2020-2021, 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
+#error "This example needs to be built with -DARM_COMPUTE_CL"
+#endif /* ARM_COMPUTE_CL */
+
+#include "arm_compute/core/Helpers.h"
+#include "arm_compute/core/KernelDescriptors.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/runtime/CL/CLScheduler.h"
+#include "arm_compute/runtime/CL/CLTuner.h"
+
+#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h"
+#include "src/gpu/cl/kernels/ClGemmLowpReductionKernel.h"
+#include "tests/CL/Helper.h"
+#include "utils/command_line/CommandLineOptions.h"
+#include "utils/command_line/CommandLineParser.h"
+#include "utils/Utils.h"
+
+#include "CommonGemmExampleOptions.h"
+#include "GemmTunerHelpers.h"
+#include <cstdlib>
+#include <memory>
+
+using namespace arm_compute;
+using namespace utils;
+using namespace arm_compute::opencl::kernels;
+using namespace arm_compute::misc::shape_calculator;
+using namespace gemm_tuner;
+
+namespace
+{
+/** Structure holding all tunable gemm configs specific to this example/strategy */
+struct GemmConfigs
+{
+    size_t m0{4};                /**< Number of rows processed by the matrix multiplication */
+    size_t n0{4};                /**< Number of columns processed by the matrix multiplication */
+    size_t k0{4};                /**< Number of partial accumulations performed by the matrix multiplication */
+    size_t h0{1};                /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */
+    bool   interleave_rhs{true}; /**< Interleave rhs matrix */
+    bool   transpose_rhs{true};  /**< Transpose rhs matrix */
+};
+
+/** Formatted output of the GemmConfigs type
+ *
+ * @param[out] os      Output stream.
+ * @param[in]  configs Tunable configurations to output
+ *
+ * @return Modified output stream.
+ */
+::std::ostream &operator<<(::std::ostream &os, const GemmConfigs &configs)
+{
+    std::string false_str = std::string("false");
+    std::string true_str  = std::string("true");
+
+    os << "m0 : " << configs.m0 << std::endl;
+    os << "n0 : " << configs.n0 << std::endl;
+    os << "k0 : " << configs.k0 << std::endl;
+    os << "h0 : " << configs.h0 << std::endl;
+    os << "interleave_rhs : " << (configs.interleave_rhs ? true_str : false_str) << std::endl;
+    os << "transpose_rhs : " << (configs.transpose_rhs ? true_str : false_str) << std::endl;
+    return os;
+}
+
+/** Command line options for gemm configs */
+class GemmConfigOptions
+{
+public:
+    /** Constructor
+     *
+     * @param[in,out] parser A parser on which "parse()" hasn't been called yet.
+     */
+    GemmConfigOptions(CommandLineParser &parser)
+        : m0(parser.add_positional_option<SimpleOption<size_t>>("m0", 4)),
+          n0(parser.add_positional_option<SimpleOption<size_t>>("n0", 4)),
+          k0(parser.add_positional_option<SimpleOption<size_t>>("k0", 4)),
+          h0(parser.add_positional_option<SimpleOption<size_t>>("h0", 1)),
+          interleave_rhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_rhs", 1)),
+          transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1))
+    {
+        m0->set_help("Number of rows processed by the matrix multiplication");
+        n0->set_help("Number of columns processed by the matrix multiplication");
+        k0->set_help("Number of partial accumulations performed by the matrix multiplication");
+        h0->set_help("Number of horizontal blocks of size (k0xn0) stored on the same output row");
+        interleave_rhs->set_help("Interleave rhs matrix (1) / Do not interleave rhs matrix (0)");
+        transpose_rhs->set_help("Transpose rhs matrix (1) / Do not transpose rhs matrix (0)");
+    }
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GemmConfigOptions(const GemmConfigOptions &) = delete;
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    GemmConfigOptions &operator=(const GemmConfigOptions &) = delete;
+    /** Allow instances of this class to be moved */
+    GemmConfigOptions(GemmConfigOptions &&) = default;
+    /** Allow instances of this class to be moved */
+    GemmConfigOptions &operator=(GemmConfigOptions &&) = default;
+    /** Default destructor */
+    ~GemmConfigOptions() = default;
+
+    SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */
+    SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */
+    SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */
+    SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */
+    SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */
+    SimpleOption<size_t> *transpose_rhs;  /**< Transpose rhs matrix option (1 enable; 0 disable) */
+};
+
+/** Consumes the gemm configuration options and creates a structure containing all information
+ *
+ * @param[in] options Options to consume
+ *
+ * @return Structure containing the gemm configurations
+ */
+GemmConfigs consume_gemm_configs(const GemmConfigOptions &options)
+{
+    GemmConfigs configs;
+    configs.m0             = options.m0->value();
+    configs.n0             = options.n0->value();
+    configs.k0             = options.k0->value();
+    configs.h0             = options.h0->value();
+    configs.interleave_rhs = options.interleave_rhs->value() != 0;
+    configs.transpose_rhs  = options.transpose_rhs->value() != 0;
+    return configs;
+}
+
+} // namespace
+
+using ClGemmLowpMatrixMultiplyReshapedOnlyRhs =
+    test::CLSynthetizeOperator<ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel>;
+using ClGemmLowpMatrixAReduction = test::CLSynthetizeOperator<ClGemmLowpMatrixAReductionKernel>;
+
+class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFusedOutputStageFixedpointExample : public Example
+{
+public:
+    bool do_setup(int argc, char **argv) override
+    {
+        // Default parameters
+        CommonGemmExampleParams params;
+        GemmConfigs             configs;
+
+        // Parse command line options
+        CommandLineParser        parser;
+        CommonGemmExampleOptions param_options(parser, DataType::QASYMM8);
+        GemmConfigOptions        config_options(parser);
+
+        parser.parse(argc, argv);
+        if (param_options.help->is_set() && param_options.help->value())
+        {
+            parser.print_help(argv[0]);
+            return false;
+        }
+        if (!parser.validate())
+        {
+            // Invalid arguments. Use default parameters and configs
+            std::cerr << "Invalid arguments." << std::endl;
+            parser.print_help(argv[0]);
+            std::cerr << "Falling back to default parameters and configs" << std::endl;
+        }
+        else
+        {
+            params  = consume_common_gemm_example_parameters(param_options);
+            configs = consume_gemm_configs(config_options);
+        }
+
+        std::cout << "Gemm parameters:" << std::endl;
+        std::cout << params << std::endl;
+        std::cout << "Gemm configurations:" << std::endl;
+        std::cout << configs << std::endl;
+
+        tuner.set_tuner_mode(params.tuner_mode);
+
+        CLScheduler::get().default_init(&tuner);
+
+        lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type));
+        rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type));
+        bias.allocator()->init(TensorInfo(TensorShape(params.N), 1, DataType::S32));
+        dst.allocator()->init(TensorInfo(TensorShape(params.N, params.M, params.B), 1, params.data_type));
+
+        // Set arbitrary quantization information (non-zero offset to ensure offset contribution stage is included)
+        // Could be extended in the future to include a user-controlled option for offset == 0
+        const QuantizationInfo q_info{0.012, 3};
+        lhs.info()->set_quantization_info(q_info);
+        rhs.info()->set_quantization_info(q_info);
+        bias.info()->set_quantization_info(q_info);
+        dst.info()->set_quantization_info(q_info);
+
+        GEMMLHSMatrixInfo lhs_info;
+        lhs_info.m0 = configs.m0;
+        lhs_info.k0 = configs.k0;
+
+        GEMMRHSMatrixInfo rhs_info;
+        rhs_info.n0                 = configs.n0;
+        rhs_info.k0                 = configs.k0;
+        rhs_info.h0                 = configs.h0;
+        rhs_info.interleave         = configs.interleave_rhs;
+        rhs_info.transpose          = configs.transpose_rhs;
+        rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet
+
+        if (rhs_info.h0 == 0)
+        {
+            rhs_info.h0 = std::max(static_cast<unsigned int>(params.N) / rhs_info.n0, 1U);
+        }
+
+        rhs_reshaped.allocator()->init(
+            TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type));
+        rhs_reshaped.info()->set_quantization_info(q_info);
+        if (rhs_info.export_to_cl_image)
+        {
+            if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info()))
+            {
+                std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl;
+                return false;
+            }
+        }
+
+        // Configure output stage for quantized case
+        GEMMLowpOutputStageInfo gemmlowp_output_stage;
+        gemmlowp_output_stage.type             = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+        gemmlowp_output_stage.output_data_type = dst.info()->data_type();
+        gemmlowp_output_stage.gemmlowp_offset  = 0;
+        {
+            gemmlowp_output_stage.is_quantized_per_channel = false;
+            // Num_filters is 1 unless quantized type is of per_channel type. Could be extended in the future to support per-channel quantization.
+            const unsigned int num_filters = 1;
+
+            dst_multipliers.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
+            dst_shifts.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32));
+
+            gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters);
+            gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters);
+            quantization::compute_quantized_multipliers_and_shifts(lhs.info(), rhs.info(), dst.info(),
+                                                                   gemmlowp_output_stage.gemmlowp_multipliers.data(),
+                                                                   gemmlowp_output_stage.gemmlowp_shifts.data());
+            gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0];
+            gemmlowp_output_stage.gemmlowp_shift      = gemmlowp_output_stage.gemmlowp_shifts[0];
+
+            // No fused activation
+            PixelValue min_val{};
+            PixelValue max_val{};
+            std::tie(min_val, max_val) = get_min_max(dst.info()->data_type());
+
+            auto min_activation = min_val.get<int32_t>();
+            auto max_activation = max_val.get<int32_t>();
+
+            // Set the GEMMLowp output stage info
+            gemmlowp_output_stage.gemmlowp_offset    = dst.info()->quantization_info().uniform().offset;
+            gemmlowp_output_stage.gemmlowp_min_bound = min_activation;
+            gemmlowp_output_stage.gemmlowp_max_bound = max_activation;
+        }
+
+        GEMMKernelInfo gemm_info;
+        gemm_info.m                       = params.M;
+        gemm_info.n                       = params.N;
+        gemm_info.k                       = params.K;
+        gemm_info.depth_output_gemm3d     = 0;
+        gemm_info.reinterpret_input_as_3d = false;
+        gemm_info.broadcast_bias          = true;
+        gemm_info.fp_mixed_precision      = false;
+        gemm_info.has_pad_y               = false;
+        gemm_info.mult_transpose1xW_width = configs.h0;
+        gemm_info.lhs_info                = lhs_info;
+        gemm_info.rhs_info                = rhs_info;
+        gemm_info.a_offset                = lhs.info()->quantization_info().uniform().offset;
+        gemm_info.b_offset                = rhs.info()->quantization_info().uniform().offset;
+        gemm_info.output_stage            = gemmlowp_output_stage;
+
+        // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0
+        if (gemm_info.b_offset != 0)
+        {
+            const TensorInfo info_vector_sum_row(compute_reductionB_shape(*lhs.info()), 1, DataType::S32);
+            vector_sum_row.allocator()->init(info_vector_sum_row);
+
+            mtx_a_reduction = std::make_unique<ClGemmLowpMatrixAReduction>();
+
+            if (!mtx_a_reduction->validate(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{}))
+            {
+                std::cerr << "Invalid arguments for CLGEMMLowpMatrixAReductionKernel." << std::endl;
+                return false;
+            }
+
+            mtx_a_reduction->configure(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{});
+        }
+        // Initialize matrix B reduction kernel only if _a_offset is not equal to 0
+        if (gemm_info.a_offset != 0)
+        {
+            const TensorInfo info_vector_sum_col(compute_reductionA_shape(*rhs.info()), 1, DataType::S32);
+            vector_sum_col.allocator()->init(info_vector_sum_col);
+            // There's no need for a Matrix B reduction kernel as this is assumed to be run only once in the prepare stage
+        }
+
+        // Validate argments
+        if (!gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info,
+                           gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(),
+                           gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(),
+                           dst_multipliers.info(), dst_shifts.info()))
+        {
+            std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel." << std::endl;
+            return false;
+        }
+
+        // Configure function
+        gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info,
+                       gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(),
+                       gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(), dst_multipliers.info(),
+                       dst_shifts.info());
+
+        // Allocate tensors
+        lhs.allocator()->allocate();
+        rhs.allocator()->allocate();
+        rhs_reshaped.allocator()->allocate();
+        bias.allocator()->allocate();
+        dst.allocator()->allocate();
+        vector_sum_col.allocator()->allocate();
+        vector_sum_row.allocator()->allocate();
+        dst_multipliers.allocator()->allocate();
+        dst_shifts.allocator()->allocate();
+
+        return true;
+    }
+    void do_run() override
+    {
+        if (mtx_a_reduction != nullptr)
+        {
+            ITensorPack red_pack({{ACL_SRC, &lhs}, {ACL_DST, &dst}});
+            mtx_a_reduction->run(red_pack);
+        }
+
+        ITensorPack gemm_pack({{ACL_SRC_0, &lhs},
+                               {ACL_SRC_1, &rhs},
+                               {ACL_BIAS, &bias},
+                               {ACL_VEC_COL_SUM, &vector_sum_col},
+                               {ACL_VEC_ROW_SUM, &vector_sum_row},
+                               {ACL_SHIFTS, &dst_shifts},
+                               {ACL_MULTIPLIERS, &dst_multipliers},
+                               {ACL_DST, &dst}});
+        gemm.run(gemm_pack);
+
+        // Make sure all the OpenCL jobs are done executing:
+        CLScheduler::get().sync();
+    }
+
+    void do_teardown() override
+    {
+    }
+
+private:
+    CLTensor                                    lhs{};
+    CLTensor                                    rhs{};
+    CLTensor                                    rhs_reshaped{};
+    CLTensor                                    bias{};
+    CLTensor                                    dst{};
+    CLTensor                                    vector_sum_col{};
+    CLTensor                                    vector_sum_row{};
+    CLTensor                                    dst_multipliers{};
+    CLTensor                                    dst_shifts{};
+    CLTuner                                     tuner{};
+    ClGemmLowpMatrixMultiplyReshapedOnlyRhs     gemm{};
+    std::unique_ptr<ClGemmLowpMatrixAReduction> mtx_a_reduction{nullptr};
+};
+
+/** Main test program for gemmlowp reshaped rhs only with fused output stage fixedpoint
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] h0, [optional] interleave_rhs, [optional] transpose_rhs )
+ */
+int main(int argc, char **argv)
+{
+    return run_example<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFusedOutputStageFixedpointExample>(argc, argv);
+}
diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp
index 25ede6dca2..be0b8a7d8a 100644
--- a/examples/graph_alexnet.cpp
+++ b/examples/graph_alexnet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,8 +39,7 @@ using namespace arm_compute::graph_utils;
 class GraphAlexnetExample : public Example
 {
 public:
-    GraphAlexnetExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "AlexNet")
+    GraphAlexnetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "AlexNet")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -53,14 +52,15 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
         }
 
         // Checks
-        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type),
+                                "QASYMM8 not supported for this graph");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -69,88 +69,80 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+        const std::array<float, 3>     mean_rgb{{122.68f, 116.67f, 104.01f}};
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb);
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(227U, 227U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(227U, 227U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
-              // Layer 1
-              << ConvolutionLayer(
-                  11U, 11U, 96U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"),
-                  PadStrideInfo(4, 4, 0, 0))
-              .set_name("conv1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu1")
-              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm1")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
-              // Layer 2
-              << ConvolutionLayer(
-                  5U, 5U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"),
-                  PadStrideInfo(1, 1, 2, 2), 2)
-              .set_name("conv2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu2")
-              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm2")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
-              // Layer 3
-              << ConvolutionLayer(
-                  3U, 3U, 384U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv3")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu3")
-              // Layer 4
-              << ConvolutionLayer(
-                  3U, 3U, 384U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1), 2)
-              .set_name("conv4")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu4")
-              // Layer 5
-              << ConvolutionLayer(
-                  3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1), 2)
-              .set_name("conv5")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu5")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool5")
-              // Layer 6
-              << FullyConnectedLayer(
-                  4096U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy"))
-              .set_name("fc6")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu6")
-              // Layer 7
-              << FullyConnectedLayer(
-                  4096U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy"))
-              .set_name("fc7")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu7")
-              // Layer 8
-              << FullyConnectedLayer(
-                  1000U,
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy"))
-              .set_name("fc8")
-              // Softmax
-              << SoftmaxLayer().set_name("prob")
-              << OutputLayer(get_output_accessor(common_params, 5));
+        graph
+            << common_params.target << common_params.fast_math_hint
+            << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
+            // Layer 1
+            << ConvolutionLayer(11U, 11U, 96U,
+                                get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy", weights_layout),
+                                get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"),
+                                PadStrideInfo(4, 4, 0, 0))
+                   .set_name("conv1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu1")
+            << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm1")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool1")
+            // Layer 2
+            << ConvolutionLayer(
+                   5U, 5U, 256U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"), PadStrideInfo(1, 1, 2, 2), 2)
+                   .set_name("conv2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu2")
+            << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm2")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool2")
+            // Layer 3
+            << ConvolutionLayer(
+                   3U, 3U, 384U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu3")
+            // Layer 4
+            << ConvolutionLayer(
+                   3U, 3U, 384U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"), PadStrideInfo(1, 1, 1, 1), 2)
+                   .set_name("conv4")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu4")
+            // Layer 5
+            << ConvolutionLayer(
+                   3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"), PadStrideInfo(1, 1, 1, 1), 2)
+                   .set_name("conv5")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu5")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool5")
+            // Layer 6
+            << FullyConnectedLayer(4096U,
+                                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy", weights_layout),
+                                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy"))
+                   .set_name("fc6")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu6")
+            // Layer 7
+            << FullyConnectedLayer(4096U,
+                                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy", weights_layout),
+                                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy"))
+                   .set_name("fc7")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu7")
+            // Layer 8
+            << FullyConnectedLayer(1000U,
+                                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy", weights_layout),
+                                   get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy"))
+                   .set_name("fc8")
+            // Softmax
+            << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
@@ -159,10 +151,11 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         // Load the precompiled kernels from a file into the kernel library, in this way the next time they are needed
         // compilation won't be required.
-        if(common_params.enable_cl_cache)
+        if (common_params.enable_cl_cache)
         {
 #ifdef ARM_COMPUTE_CL
             restore_program_cache_from_file();
@@ -172,7 +165,7 @@ public:
         graph.finalize(common_params.target, config);
 
         // Save the opencl kernels to a file
-        if(common_opts.enable_cl_cache)
+        if (common_opts.enable_cl_cache)
         {
 #ifdef ARM_COMPUTE_CL
             save_program_cache_to_file();
diff --git a/examples/graph_deepspeech_v0_4_1.cpp b/examples/graph_deepspeech_v0_4_1.cpp
index b655452391..08cd4a47b1 100644
--- a/examples/graph_deepspeech_v0_4_1.cpp
+++ b/examples/graph_deepspeech_v0_4_1.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019-2020 ARM Limited.
+ * Copyright (c) 2019-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -23,6 +23,7 @@
  */
 #include "arm_compute/graph.h"
 #include "arm_compute/graph/Types.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -37,8 +38,7 @@ using namespace arm_compute::graph_utils;
 class GraphDeepSpeechExample : public Example
 {
 public:
-    GraphDeepSpeechExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "DeepSpeech v0.4.1")
+    GraphDeepSpeechExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "DeepSpeech v0.4.1")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -51,7 +51,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -64,7 +64,7 @@ public:
         std::string       data_path  = common_params.data_path;
         const std::string model_path = "/cnn_data/deepspeech_model/";
 
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += model_path;
         }
@@ -77,141 +77,143 @@ public:
         const float cell_clip = 20.f;
 
         // Create input descriptor
-        const TensorShape tensor_shape     = permute_shape(TensorShape(26U, 19U, n_steps, 1U), DataLayout::NHWC, common_params.data_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(26U, 19U, n_steps, 1U), DataLayout::NHWC, common_params.data_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NHWC;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor,
-                            get_weights_accessor(data_path, "input_values_x" + std::to_string(n_steps) + ".npy", weights_layout))
-              .set_name("input_node");
+                            get_weights_accessor(data_path, "input_values_x" + std::to_string(n_steps) + ".npy",
+                                                 weights_layout))
+                     .set_name("input_node");
 
-        if(common_params.data_layout == DataLayout::NCHW)
+        if (common_params.data_layout == DataLayout::NCHW)
         {
             graph << PermuteLayer(PermutationVector(2U, 0U, 1U), common_params.data_layout).set_name("permute_to_nhwc");
         }
 
         graph << ReshapeLayer(TensorShape(494U, n_steps)).set_name("Reshape_input")
               // Layer 1
-              << FullyConnectedLayer(
-                  2048U,
-                  get_weights_accessor(data_path, "h1_transpose.npy", weights_layout),
-                  get_weights_accessor(data_path, "MatMul_bias.npy"))
-              .set_name("fc0")
+              << FullyConnectedLayer(2048U, get_weights_accessor(data_path, "h1_transpose.npy", weights_layout),
+                                     get_weights_accessor(data_path, "MatMul_bias.npy"))
+                     .set_name("fc0")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, cell_clip))
-              .set_name("Relu")
+                     .set_name("Relu")
               // Layer 2
-              << FullyConnectedLayer(
-                  2048U,
-                  get_weights_accessor(data_path, "h2_transpose.npy", weights_layout),
-                  get_weights_accessor(data_path, "MatMul_1_bias.npy"))
-              .set_name("fc1")
+              << FullyConnectedLayer(2048U, get_weights_accessor(data_path, "h2_transpose.npy", weights_layout),
+                                     get_weights_accessor(data_path, "MatMul_1_bias.npy"))
+                     .set_name("fc1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, cell_clip))
-              .set_name("Relu_1")
+                     .set_name("Relu_1")
               // Layer 3
-              << FullyConnectedLayer(
-                  2048U,
-                  get_weights_accessor(data_path, "h3_transpose.npy", weights_layout),
-                  get_weights_accessor(data_path, "MatMul_2_bias.npy"))
-              .set_name("fc2")
+              << FullyConnectedLayer(2048U, get_weights_accessor(data_path, "h3_transpose.npy", weights_layout),
+                                     get_weights_accessor(data_path, "MatMul_2_bias.npy"))
+                     .set_name("fc2")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, cell_clip))
-              .set_name("Relu_2")
+                     .set_name("Relu_2")
               // Layer 4
               << ReshapeLayer(TensorShape(2048U, 1U, n_steps)).set_name("Reshape_1");
 
         // Unstack Layer (using SplitLayerNode)
-        NodeParams unstack_params = { "unstack", graph.hints().target_hint };
-        NodeID     unstack_nid    = GraphBuilder::add_split_node(graph.graph(), unstack_params, { graph.tail_node(), 0 }, n_steps, 2);
+        NodeParams unstack_params = {"unstack", graph.hints().target_hint};
+        NodeID     unstack_nid =
+            GraphBuilder::add_split_node(graph.graph(), unstack_params, {graph.tail_node(), 0}, n_steps, 2);
 
         // Create input state descriptor
-        TensorDescriptor state_descriptor = TensorDescriptor(TensorShape(2048U), common_params.data_type).set_layout(common_params.data_layout);
-        SubStream        previous_state(graph);
-        SubStream        add_y(graph);
+        TensorDescriptor state_descriptor =
+            TensorDescriptor(TensorShape(2048U), common_params.data_type).set_layout(common_params.data_layout);
+        SubStream previous_state(graph);
+        SubStream add_y(graph);
 
         // Initial state for LSTM is all zeroes for both state_h and state_c, therefore only one input is created
-        previous_state << InputLayer(state_descriptor,
-                                     get_weights_accessor(data_path, "zeros.npy"))
-                       .set_name("previous_state_c_h");
-        add_y << InputLayer(state_descriptor,
-                            get_weights_accessor(data_path, "ones.npy"))
-              .set_name("add_y");
+        previous_state << InputLayer(state_descriptor, get_weights_accessor(data_path, "zeros.npy"))
+                              .set_name("previous_state_c_h");
+        add_y << InputLayer(state_descriptor, get_weights_accessor(data_path, "ones.npy")).set_name("add_y");
 
         // Create LSTM Fully Connected weights and bias descriptors
-        TensorDescriptor lstm_weights_descriptor = TensorDescriptor(TensorShape(4096U, 8192U), common_params.data_type).set_layout(common_params.data_layout);
-        TensorDescriptor lstm_bias_descriptor    = TensorDescriptor(TensorShape(8192U), common_params.data_type).set_layout(common_params.data_layout);
-        SubStream        lstm_fc_weights(graph);
-        SubStream        lstm_fc_bias(graph);
-        lstm_fc_weights << ConstantLayer(lstm_weights_descriptor,
-                                         get_weights_accessor(data_path, "rnn_lstm_cell_kernel_transpose.npy", weights_layout))
-                        .set_name("h5/transpose");
+        TensorDescriptor lstm_weights_descriptor =
+            TensorDescriptor(TensorShape(4096U, 8192U), common_params.data_type).set_layout(common_params.data_layout);
+        TensorDescriptor lstm_bias_descriptor =
+            TensorDescriptor(TensorShape(8192U), common_params.data_type).set_layout(common_params.data_layout);
+        SubStream lstm_fc_weights(graph);
+        SubStream lstm_fc_bias(graph);
+        lstm_fc_weights << ConstantLayer(
+                               lstm_weights_descriptor,
+                               get_weights_accessor(data_path, "rnn_lstm_cell_kernel_transpose.npy", weights_layout))
+                               .set_name("h5/transpose");
         lstm_fc_bias << ConstantLayer(lstm_bias_descriptor,
                                       get_weights_accessor(data_path, "rnn_lstm_cell_MatMul_bias.npy"))
-                     .set_name("MatMul_3_bias");
+                            .set_name("MatMul_3_bias");
 
         // LSTM Block
-        std::pair<SubStream, SubStream> new_state_1  = add_lstm_cell(unstack_nid, 0, previous_state, previous_state, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_2  = add_lstm_cell(unstack_nid, 1, new_state_1.first, new_state_1.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_3  = add_lstm_cell(unstack_nid, 2, new_state_2.first, new_state_2.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_4  = add_lstm_cell(unstack_nid, 3, new_state_3.first, new_state_3.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_5  = add_lstm_cell(unstack_nid, 4, new_state_4.first, new_state_4.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_6  = add_lstm_cell(unstack_nid, 5, new_state_5.first, new_state_5.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_7  = add_lstm_cell(unstack_nid, 6, new_state_6.first, new_state_6.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_8  = add_lstm_cell(unstack_nid, 7, new_state_7.first, new_state_7.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_9  = add_lstm_cell(unstack_nid, 8, new_state_8.first, new_state_8.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_10 = add_lstm_cell(unstack_nid, 9, new_state_9.first, new_state_9.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_11 = add_lstm_cell(unstack_nid, 10, new_state_10.first, new_state_10.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_12 = add_lstm_cell(unstack_nid, 11, new_state_11.first, new_state_11.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_13 = add_lstm_cell(unstack_nid, 12, new_state_12.first, new_state_12.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_14 = add_lstm_cell(unstack_nid, 13, new_state_13.first, new_state_13.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_15 = add_lstm_cell(unstack_nid, 14, new_state_14.first, new_state_14.second, add_y, lstm_fc_weights, lstm_fc_bias);
-        std::pair<SubStream, SubStream> new_state_16 = add_lstm_cell(unstack_nid, 15, new_state_15.first, new_state_15.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_1 =
+            add_lstm_cell(unstack_nid, 0, previous_state, previous_state, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_2 =
+            add_lstm_cell(unstack_nid, 1, new_state_1.first, new_state_1.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_3 =
+            add_lstm_cell(unstack_nid, 2, new_state_2.first, new_state_2.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_4 =
+            add_lstm_cell(unstack_nid, 3, new_state_3.first, new_state_3.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_5 =
+            add_lstm_cell(unstack_nid, 4, new_state_4.first, new_state_4.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_6 =
+            add_lstm_cell(unstack_nid, 5, new_state_5.first, new_state_5.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_7 =
+            add_lstm_cell(unstack_nid, 6, new_state_6.first, new_state_6.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_8 =
+            add_lstm_cell(unstack_nid, 7, new_state_7.first, new_state_7.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_9 =
+            add_lstm_cell(unstack_nid, 8, new_state_8.first, new_state_8.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_10 =
+            add_lstm_cell(unstack_nid, 9, new_state_9.first, new_state_9.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_11 = add_lstm_cell(
+            unstack_nid, 10, new_state_10.first, new_state_10.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_12 = add_lstm_cell(
+            unstack_nid, 11, new_state_11.first, new_state_11.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_13 = add_lstm_cell(
+            unstack_nid, 12, new_state_12.first, new_state_12.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_14 = add_lstm_cell(
+            unstack_nid, 13, new_state_13.first, new_state_13.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_15 = add_lstm_cell(
+            unstack_nid, 14, new_state_14.first, new_state_14.second, add_y, lstm_fc_weights, lstm_fc_bias);
+        std::pair<SubStream, SubStream> new_state_16 = add_lstm_cell(
+            unstack_nid, 15, new_state_15.first, new_state_15.second, add_y, lstm_fc_weights, lstm_fc_bias);
 
         // Concatenate new states on height
         const int axis = 1;
-        graph << StackLayer(axis,
-                            std::move(new_state_1.second),
-                            std::move(new_state_2.second),
-                            std::move(new_state_3.second),
-                            std::move(new_state_4.second),
-                            std::move(new_state_5.second),
-                            std::move(new_state_6.second),
-                            std::move(new_state_7.second),
-                            std::move(new_state_8.second),
-                            std::move(new_state_9.second),
-                            std::move(new_state_10.second),
-                            std::move(new_state_11.second),
-                            std::move(new_state_12.second),
-                            std::move(new_state_13.second),
-                            std::move(new_state_14.second),
-                            std::move(new_state_15.second),
-                            std::move(new_state_16.second))
-              .set_name("concat");
-
-        graph << FullyConnectedLayer(
-                  2048U,
-                  get_weights_accessor(data_path, "h5_transpose.npy", weights_layout),
-                  get_weights_accessor(data_path, "MatMul_3_bias.npy"))
-              .set_name("fc3")
+        graph << StackLayer(axis, std::move(new_state_1.second), std::move(new_state_2.second),
+                            std::move(new_state_3.second), std::move(new_state_4.second), std::move(new_state_5.second),
+                            std::move(new_state_6.second), std::move(new_state_7.second), std::move(new_state_8.second),
+                            std::move(new_state_9.second), std::move(new_state_10.second),
+                            std::move(new_state_11.second), std::move(new_state_12.second),
+                            std::move(new_state_13.second), std::move(new_state_14.second),
+                            std::move(new_state_15.second), std::move(new_state_16.second))
+                     .set_name("concat");
+
+        graph << FullyConnectedLayer(2048U, get_weights_accessor(data_path, "h5_transpose.npy", weights_layout),
+                                     get_weights_accessor(data_path, "MatMul_3_bias.npy"))
+                     .set_name("fc3")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, cell_clip))
-              .set_name("Relu3")
-              << FullyConnectedLayer(
-                  29U,
-                  get_weights_accessor(data_path, "h6_transpose.npy", weights_layout),
-                  get_weights_accessor(data_path, "MatMul_4_bias.npy"))
-              .set_name("fc3")
+                     .set_name("Relu3")
+              << FullyConnectedLayer(29U, get_weights_accessor(data_path, "h6_transpose.npy", weights_layout),
+                                     get_weights_accessor(data_path, "MatMul_4_bias.npy"))
+                     .set_name("fc3")
               << SoftmaxLayer().set_name("logits");
 
         graph << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         graph.finalize(common_params.target, config);
 
@@ -239,7 +241,7 @@ private:
         return Status{};
     }
 
-    std::pair<SubStream, SubStream> add_lstm_cell(NodeID unstack_nid,
+    std::pair<SubStream, SubStream> add_lstm_cell(NodeID       unstack_nid,
                                                   unsigned int unstack_idx,
                                                   SubStream    previous_state_c,
                                                   SubStream    previous_state_h,
@@ -248,41 +250,41 @@ private:
                                                   SubStream    lstm_fc_bias)
     {
         const std::string         cell_name("rnn/lstm_cell_" + std::to_string(unstack_idx));
-        const DataLayoutDimension concat_dim = (common_params.data_layout == DataLayout::NHWC) ? DataLayoutDimension::CHANNEL : DataLayoutDimension::WIDTH;
+        const DataLayoutDimension concat_dim =
+            (common_params.data_layout == DataLayout::NHWC) ? DataLayoutDimension::CHANNEL : DataLayoutDimension::WIDTH;
 
         // Concatenate result of Unstack with previous_state_h
-        NodeParams concat_params = { cell_name + "/concat", graph.hints().target_hint };
+        NodeParams concat_params = {cell_name + "/concat", graph.hints().target_hint};
         NodeID     concat_nid    = graph.graph().add_node<ConcatenateLayerNode>(2, concat_dim);
         graph.graph().add_connection(unstack_nid, unstack_idx, concat_nid, 0);
         graph.graph().add_connection(previous_state_h.tail_node(), 0, concat_nid, 1);
         set_node_params(graph.graph(), concat_nid, concat_params);
         graph.forward_tail(concat_nid);
 
-        graph << FullyConnectedLayer(
-                  8192U,
-                  lstm_fc_weights,
-                  lstm_fc_bias)
-              .set_name(cell_name + "/BiasAdd");
+        graph << FullyConnectedLayer(8192U, lstm_fc_weights, lstm_fc_bias).set_name(cell_name + "/BiasAdd");
 
         // Split Layer
         const unsigned int num_splits = 4;
         const unsigned int split_axis = 0;
 
-        NodeParams split_params = { cell_name + "/split", graph.hints().target_hint };
-        NodeID     split_nid    = GraphBuilder::add_split_node(graph.graph(), split_params, { graph.tail_node(), 0 }, num_splits, split_axis);
+        NodeParams split_params = {cell_name + "/split", graph.hints().target_hint};
+        NodeID     split_nid =
+            GraphBuilder::add_split_node(graph.graph(), split_params, {graph.tail_node(), 0}, num_splits, split_axis);
 
-        NodeParams sigmoid_1_params = { cell_name + "/Sigmoid_1", graph.hints().target_hint };
-        NodeParams add_params       = { cell_name + "/add", graph.hints().target_hint };
-        NodeParams sigmoid_2_params = { cell_name + "/Sigmoid_2", graph.hints().target_hint };
-        NodeParams tanh_params      = { cell_name + "/Tanh", graph.hints().target_hint };
+        NodeParams sigmoid_1_params = {cell_name + "/Sigmoid_1", graph.hints().target_hint};
+        NodeParams add_params       = {cell_name + "/add", graph.hints().target_hint};
+        NodeParams sigmoid_2_params = {cell_name + "/Sigmoid_2", graph.hints().target_hint};
+        NodeParams tanh_params      = {cell_name + "/Tanh", graph.hints().target_hint};
 
         // Sigmoid 1 (first split)
-        NodeID sigmoid_1_nid = graph.graph().add_node<ActivationLayerNode>(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+        NodeID sigmoid_1_nid = graph.graph().add_node<ActivationLayerNode>(
+            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
         graph.graph().add_connection(split_nid, 0, sigmoid_1_nid, 0);
         set_node_params(graph.graph(), sigmoid_1_nid, sigmoid_1_params);
 
         // Tanh (second split)
-        NodeID tanh_nid = graph.graph().add_node<ActivationLayerNode>(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
+        NodeID tanh_nid = graph.graph().add_node<ActivationLayerNode>(
+            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f));
         graph.graph().add_connection(split_nid, 1, tanh_nid, 0);
         set_node_params(graph.graph(), tanh_nid, tanh_params);
 
@@ -290,13 +292,15 @@ private:
         tanh_ss.forward_tail(tanh_nid);
 
         // Add (third split)
-        NodeID add_nid = graph.graph().add_node<EltwiseLayerNode>(descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add });
+        NodeID add_nid =
+            graph.graph().add_node<EltwiseLayerNode>(descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add});
         graph.graph().add_connection(split_nid, 2, add_nid, 0);
         graph.graph().add_connection(add_y.tail_node(), 0, add_nid, 1);
         set_node_params(graph.graph(), add_nid, add_params);
 
         // Sigmoid 2 (fourth split)
-        NodeID sigmoid_2_nid = graph.graph().add_node<ActivationLayerNode>(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
+        NodeID sigmoid_2_nid = graph.graph().add_node<ActivationLayerNode>(
+            ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC));
         graph.graph().add_connection(split_nid, 3, sigmoid_2_nid, 0);
         set_node_params(graph.graph(), sigmoid_2_nid, sigmoid_2_params);
 
@@ -304,28 +308,28 @@ private:
         sigmoid_1_ss.forward_tail(sigmoid_1_nid);
         SubStream mul_1_ss(sigmoid_1_ss);
         mul_1_ss << EltwiseLayer(std::move(sigmoid_1_ss), std::move(tanh_ss), EltwiseOperation::Mul)
-                 .set_name(cell_name + "/mul_1");
+                        .set_name(cell_name + "/mul_1");
 
         SubStream tanh_1_ss_tmp(graph);
         tanh_1_ss_tmp.forward_tail(add_nid);
 
         tanh_1_ss_tmp << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC))
-                      .set_name(cell_name + "/Sigmoid");
+                             .set_name(cell_name + "/Sigmoid");
         SubStream tanh_1_ss_tmp2(tanh_1_ss_tmp);
         tanh_1_ss_tmp2 << EltwiseLayer(std::move(tanh_1_ss_tmp), std::move(previous_state_c), EltwiseOperation::Mul)
-                       .set_name(cell_name + "/mul");
+                              .set_name(cell_name + "/mul");
         SubStream tanh_1_ss(tanh_1_ss_tmp2);
         tanh_1_ss << EltwiseLayer(std::move(tanh_1_ss_tmp2), std::move(mul_1_ss), EltwiseOperation::Add)
-                  .set_name(cell_name + "/new_state_c");
+                         .set_name(cell_name + "/new_state_c");
         SubStream new_state_c(tanh_1_ss);
 
         tanh_1_ss << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f))
-                  .set_name(cell_name + "/Tanh_1");
+                         .set_name(cell_name + "/Tanh_1");
 
         SubStream sigmoid_2_ss(graph);
         sigmoid_2_ss.forward_tail(sigmoid_2_nid);
         graph << EltwiseLayer(std::move(sigmoid_2_ss), std::move(tanh_1_ss), EltwiseOperation::Mul)
-              .set_name(cell_name + "/new_state_h");
+                     .set_name(cell_name + "/new_state_h");
 
         SubStream new_state_h(graph);
         return std::pair<SubStream, SubStream>(new_state_c, new_state_h);
diff --git a/examples/graph_edsr.cpp b/examples/graph_edsr.cpp
index 2f2a9fa4d7..b4f2fadf4a 100644
--- a/examples/graph_edsr.cpp
+++ b/examples/graph_edsr.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,28 +22,28 @@
  * SOFTWARE.
  */
 
+#include "graph_edsr.h"
+
 #include "arm_compute/graph/Utils.h"
 
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/Utils.h"
 
-#include "graph_edsr.h"
-
 using namespace arm_compute::graph;
 using namespace arm_compute::utils;
 
 class GraphEdsrExample : public Example
 {
 public:
-    GraphEdsrExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params()
+    GraphEdsrExample() : cmd_parser(), common_opts(cmd_parser), common_params()
     {
         expected_output_filename = cmd_parser.add_option<SimpleOption<std::string>>("expected-output-filename", "");
-        expected_output_filename->set_help("Name of npy file containing the expected output to validate the graph output.");
+        expected_output_filename->set_help(
+            "Name of npy file containing the expected output to validate the graph output.");
     }
 
-    GraphEdsrExample(const GraphEdsrExample &) = delete;
+    GraphEdsrExample(const GraphEdsrExample &)            = delete;
     GraphEdsrExample &operator=(const GraphEdsrExample &) = delete;
     ~GraphEdsrExample() override                          = default;
 
@@ -57,13 +57,14 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
         }
 
-        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type != DataType::QASYMM8, "Only QASYMM8 is supported for this graph example");
+        ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type != DataType::QASYMM8,
+                                "Only QASYMM8 is supported for this graph example");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -75,6 +76,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         context.set_config(config);
 
@@ -97,11 +99,32 @@ private:
     GraphContext context{};
     GraphManager manager{};
 
-    SimpleOption<std::string> *expected_output_filename{ nullptr };
+    SimpleOption<std::string> *expected_output_filename{nullptr};
 
     GraphEdsr model{};
 };
 
+/** Internal implementation of UINT8 EDSR with some modifications from the paper.
+ * The sub-pixel convolution has been replaced with a deconvolution layer. This
+ * operation is mathematically the same.
+ *
+ * Convolution replaced by deconvolution:
+ *      https://arxiv.org/abs/1609.07009
+ *      "Is the deconvolution layer the same as a convolutional layer?"
+ *      Wenzhe Shi, Jose Caballero, Lucas Theis, Ferenc Huszar, Andrew Aitken, Christian Ledig, Zehan Wang
+ *
+ * Original model is:
+ *      https://arxiv.org/abs/1707.02921
+ *      "Enhanced Deep Residual Networks for Single Image Super-Resolution"
+ *      Bee Lim, Sanghyun Son, Heewon Kim, Seungjun Nah, Kyoung Mu Lee
+ *
+ * @note To list all the possible arguments execute the binary appended with the --help option
+ *
+ * @param[in] argc Number of arguments
+ * @param[in] argv Arguments
+ *
+ * @return Return code
+ */
 int main(int argc, char **argv)
 {
     return run_example<GraphEdsrExample>(argc, argv);
diff --git a/examples/graph_edsr.h b/examples/graph_edsr.h
index cb467d0377..1161e4ba38 100644
--- a/examples/graph_edsr.h
+++ b/examples/graph_edsr.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -32,12 +32,12 @@
 class GraphEdsr
 {
 public:
-    GraphEdsr()
-        : _graph(0, "EDSR")
+    GraphEdsr() : _graph(0, "EDSR")
     {
     }
 
-    bool setup(const arm_compute::utils::CommonGraphParams &common_params, const arm_compute::utils::SimpleOption<std::string> &expected_output_filename)
+    bool setup(const arm_compute::utils::CommonGraphParams         &common_params,
+               const arm_compute::utils::SimpleOption<std::string> &expected_output_filename)
     {
         using namespace arm_compute;
         using namespace arm_compute::graph;
@@ -47,1225 +47,879 @@ public:
         const auto &data_path = common_params.data_path;
         const auto &target    = common_params.target;
 
-        NodeID id_upscale_net_FakeQuantWithMinMaxVars_transposed = _graph.add_node<ConstNode>(
-                                                                       TensorDescriptor
-        {
-            TensorShape{ 12, 2, 2, 3 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00393533194437623, 1),
-            DataLayout::NHWC });
-        INode *node_upscale_net_FakeQuantWithMinMaxVars_transposed = _graph.node(id_upscale_net_FakeQuantWithMinMaxVars_transposed);
-        node_upscale_net_FakeQuantWithMinMaxVars_transposed->set_common_node_parameters(NodeParams{ "upscale_net_FakeQuantWithMinMaxVars_transposed", target });
-        node_upscale_net_FakeQuantWithMinMaxVars_transposed->output(0)->set_accessor(get_weights_accessor(data_path,
-                                                                                                          "/cnn_data/edsr_model/upscale_net_FakeQuantWithMinMaxVars_transposed.npy", DataLayout::NHWC));
-
-        NodeID id_pre_upscale_Conv2D_bias = _graph.add_node<ConstNode>(
-                                                TensorDescriptor
-        {
-            TensorShape{ 12 },
-            DataType::S32,
-            QuantizationInfo(2.9644968435604824e-06),
-            DataLayout::NHWC });
+        NodeID id_upscale_net_FakeQuantWithMinMaxVars_transposed = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{12, 2, 2, 3}, DataType::QASYMM8, QuantizationInfo(0.00393533194437623, 1), DataLayout::NHWC});
+        INode *node_upscale_net_FakeQuantWithMinMaxVars_transposed =
+            _graph.node(id_upscale_net_FakeQuantWithMinMaxVars_transposed);
+        node_upscale_net_FakeQuantWithMinMaxVars_transposed->set_common_node_parameters(
+            NodeParams{"upscale_net_FakeQuantWithMinMaxVars_transposed", target});
+        node_upscale_net_FakeQuantWithMinMaxVars_transposed->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/upscale_net_FakeQuantWithMinMaxVars_transposed.npy", DataLayout::NHWC));
+
+        NodeID id_pre_upscale_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{12}, DataType::S32, QuantizationInfo(2.9644968435604824e-06), DataLayout::NHWC});
         INode *node_pre_upscale_Conv2D_bias = _graph.node(id_pre_upscale_Conv2D_bias);
-        node_pre_upscale_Conv2D_bias->set_common_node_parameters(NodeParams{ "pre_upscale_Conv2D_bias", target });
-        node_pre_upscale_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_upscale_Conv2D_bias.npy", DataLayout::NHWC));
-
-        NodeID id_pre_upscale_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                            TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 12 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.000455576169770211, 128),
-            DataLayout::NHWC });
+        node_pre_upscale_Conv2D_bias->set_common_node_parameters(NodeParams{"pre_upscale_Conv2D_bias", target});
+        node_pre_upscale_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_upscale_Conv2D_bias.npy", DataLayout::NHWC));
+
+        NodeID id_pre_upscale_FakeQuantWithMinMaxVars =
+            _graph.add_node<ConstNode>(TensorDescriptor{TensorShape{256, 3, 3, 12}, DataType::QASYMM8,
+                                                        QuantizationInfo(0.000455576169770211, 128), DataLayout::NHWC});
         INode *node_pre_upscale_FakeQuantWithMinMaxVars = _graph.node(id_pre_upscale_FakeQuantWithMinMaxVars);
-        node_pre_upscale_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "pre_upscale_FakeQuantWithMinMaxVars", target });
-        node_pre_upscale_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_upscale_FakeQuantWithMinMaxVars.npy",
-                                                                                               DataLayout::NHWC));
-
-        NodeID id_post_residual_Conv2D_bias = _graph.add_node<ConstNode>(
-                                                  TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.2760000345224398e-06),
-            DataLayout::NHWC });
+        node_pre_upscale_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"pre_upscale_FakeQuantWithMinMaxVars", target});
+        node_pre_upscale_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/pre_upscale_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_post_residual_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.2760000345224398e-06), DataLayout::NHWC});
         INode *node_post_residual_Conv2D_bias = _graph.node(id_post_residual_Conv2D_bias);
-        node_post_residual_Conv2D_bias->set_common_node_parameters(NodeParams{ "post_residual_Conv2D_bias", target });
-        node_post_residual_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/post_residual_Conv2D_bias.npy", DataLayout::NHWC));
+        node_post_residual_Conv2D_bias->set_common_node_parameters(NodeParams{"post_residual_Conv2D_bias", target});
+        node_post_residual_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/post_residual_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_post_residual_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                              TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00036424631252884865, 129),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00036424631252884865, 129), DataLayout::NHWC});
         INode *node_post_residual_FakeQuantWithMinMaxVars = _graph.node(id_post_residual_FakeQuantWithMinMaxVars);
-        node_post_residual_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "post_residual_FakeQuantWithMinMaxVars", target });
-        node_post_residual_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/post_residual_FakeQuantWithMinMaxVars.npy",
-                                                                                                 DataLayout::NHWC));
-
-        TensorShape scalar_4d_shape{};
+        node_post_residual_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"post_residual_FakeQuantWithMinMaxVars", target});
+        node_post_residual_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/post_residual_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
 
-        scalar_4d_shape.set(0, 1, false).set(1, 1, false).set(2, 1, false).set(3, 1, false);
-
-        NodeID id_mul_15_y = _graph.add_node<ConstNode>(
-                                 TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        NodeID id_mul_15_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_15_y = _graph.node(id_mul_15_y);
-        node_mul_15_y->set_common_node_parameters(NodeParams{ "mul_15_y", target });
-        node_mul_15_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_15_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_15_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                               TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.2441644230420934e-06),
-            DataLayout::NHWC });
+        node_mul_15_y->set_common_node_parameters(NodeParams{"mul_15_y", target});
+        node_mul_15_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_15_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_15_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.2441644230420934e-06), DataLayout::NHWC});
         INode *node_block_15_1_Conv2D_bias = _graph.node(id_block_15_1_Conv2D_bias);
-        node_block_15_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_15_1_Conv2D_bias", target });
-        node_block_15_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_15_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_15_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_15_1_Conv2D_bias", target});
+        node_block_15_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_15_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_15_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                           TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00037038681330159307, 125),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00037038681330159307, 125), DataLayout::NHWC});
         INode *node_block_15_1_FakeQuantWithMinMaxVars = _graph.node(id_block_15_1_FakeQuantWithMinMaxVars);
-        node_block_15_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_15_1_FakeQuantWithMinMaxVars", target });
-        node_block_15_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_15_1_FakeQuantWithMinMaxVars.npy",
-                                                                                              DataLayout::NHWC));
-
-        NodeID id_mul_14_y = _graph.add_node<ConstNode>(
-                                 TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_15_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_15_1_FakeQuantWithMinMaxVars", target});
+        node_block_15_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_15_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_14_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_14_y = _graph.node(id_mul_14_y);
-        node_mul_14_y->set_common_node_parameters(NodeParams{ "mul_14_y", target });
-        node_mul_14_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_14_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_14_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                               TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.3417260333881131e-06),
-            DataLayout::NHWC });
+        node_mul_14_y->set_common_node_parameters(NodeParams{"mul_14_y", target});
+        node_mul_14_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_14_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_14_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.3417260333881131e-06), DataLayout::NHWC});
         INode *node_block_14_1_Conv2D_bias = _graph.node(id_block_14_1_Conv2D_bias);
-        node_block_14_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_14_1_Conv2D_bias", target });
-        node_block_14_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_14_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_14_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_14_1_Conv2D_bias", target});
+        node_block_14_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_14_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_14_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                           TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00040307495510205626, 127),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00040307495510205626, 127), DataLayout::NHWC});
         INode *node_block_14_1_FakeQuantWithMinMaxVars = _graph.node(id_block_14_1_FakeQuantWithMinMaxVars);
-        node_block_14_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_14_1_FakeQuantWithMinMaxVars", target });
-        node_block_14_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_14_1_FakeQuantWithMinMaxVars.npy",
-                                                                                              DataLayout::NHWC));
-
-        NodeID id_mul_13_y = _graph.add_node<ConstNode>(
-                                 TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_14_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_14_1_FakeQuantWithMinMaxVars", target});
+        node_block_14_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_14_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_13_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_13_y = _graph.node(id_mul_13_y);
-        node_mul_13_y->set_common_node_parameters(NodeParams{ "mul_13_y", target });
-        node_mul_13_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_13_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_13_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                               TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.2636977544389083e-06),
-            DataLayout::NHWC });
+        node_mul_13_y->set_common_node_parameters(NodeParams{"mul_13_y", target});
+        node_mul_13_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_13_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_13_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.2636977544389083e-06), DataLayout::NHWC});
         INode *node_block_13_1_Conv2D_bias = _graph.node(id_block_13_1_Conv2D_bias);
-        node_block_13_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_13_1_Conv2D_bias", target });
-        node_block_13_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_13_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_13_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_13_1_Conv2D_bias", target});
+        node_block_13_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_13_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_13_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                           TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003858553245663643, 131),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.0003858553245663643, 131), DataLayout::NHWC});
         INode *node_block_13_1_FakeQuantWithMinMaxVars = _graph.node(id_block_13_1_FakeQuantWithMinMaxVars);
-        node_block_13_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_13_1_FakeQuantWithMinMaxVars", target });
-        node_block_13_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_13_1_FakeQuantWithMinMaxVars.npy",
-                                                                                              DataLayout::NHWC));
-
-        NodeID id_mul_12_y = _graph.add_node<ConstNode>(
-                                 TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_13_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_13_1_FakeQuantWithMinMaxVars", target});
+        node_block_13_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_13_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_12_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_12_y = _graph.node(id_mul_12_y);
-        node_mul_12_y->set_common_node_parameters(NodeParams{ "mul_12_y", target });
-        node_mul_12_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_12_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_12_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                               TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.3479783547154511e-06),
-            DataLayout::NHWC });
+        node_mul_12_y->set_common_node_parameters(NodeParams{"mul_12_y", target});
+        node_mul_12_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_12_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_12_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.3479783547154511e-06), DataLayout::NHWC});
         INode *node_block_12_1_Conv2D_bias = _graph.node(id_block_12_1_Conv2D_bias);
-        node_block_12_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_12_1_Conv2D_bias", target });
-        node_block_12_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_12_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_12_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_12_1_Conv2D_bias", target});
+        node_block_12_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_12_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_12_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                           TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00041212860378436744, 130),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00041212860378436744, 130), DataLayout::NHWC});
         INode *node_block_12_1_FakeQuantWithMinMaxVars = _graph.node(id_block_12_1_FakeQuantWithMinMaxVars);
-        node_block_12_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_12_1_FakeQuantWithMinMaxVars", target });
-        node_block_12_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_12_1_FakeQuantWithMinMaxVars.npy",
-                                                                                              DataLayout::NHWC));
-
-        NodeID id_mul_11_y = _graph.add_node<ConstNode>(
-                                 TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_12_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_12_1_FakeQuantWithMinMaxVars", target});
+        node_block_12_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_12_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_11_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_11_y = _graph.node(id_mul_11_y);
-        node_mul_11_y->set_common_node_parameters(NodeParams{ "mul_11_y", target });
-        node_mul_11_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_11_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_11_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                               TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.2847248171965475e-06),
-            DataLayout::NHWC });
+        node_mul_11_y->set_common_node_parameters(NodeParams{"mul_11_y", target});
+        node_mul_11_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_11_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_11_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.2847248171965475e-06), DataLayout::NHWC});
         INode *node_block_11_1_Conv2D_bias = _graph.node(id_block_11_1_Conv2D_bias);
-        node_block_11_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_11_1_Conv2D_bias", target });
-        node_block_11_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_11_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_11_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_11_1_Conv2D_bias", target});
+        node_block_11_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_11_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_11_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                           TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00040296532097272575, 131),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00040296532097272575, 131), DataLayout::NHWC});
         INode *node_block_11_1_FakeQuantWithMinMaxVars = _graph.node(id_block_11_1_FakeQuantWithMinMaxVars);
-        node_block_11_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_11_1_FakeQuantWithMinMaxVars", target });
-        node_block_11_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_11_1_FakeQuantWithMinMaxVars.npy",
-                                                                                              DataLayout::NHWC));
-
-        NodeID id_mul_10_y = _graph.add_node<ConstNode>(
-                                 TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_11_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_11_1_FakeQuantWithMinMaxVars", target});
+        node_block_11_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_11_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_10_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_10_y = _graph.node(id_mul_10_y);
-        node_mul_10_y->set_common_node_parameters(NodeParams{ "mul_10_y", target });
-        node_mul_10_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_10_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_10_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                               TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.1997129831797793e-06),
-            DataLayout::NHWC });
+        node_mul_10_y->set_common_node_parameters(NodeParams{"mul_10_y", target});
+        node_mul_10_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_10_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_10_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.1997129831797793e-06), DataLayout::NHWC});
         INode *node_block_10_1_Conv2D_bias = _graph.node(id_block_10_1_Conv2D_bias);
-        node_block_10_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_10_1_Conv2D_bias", target });
-        node_block_10_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_10_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_10_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_10_1_Conv2D_bias", target});
+        node_block_10_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_10_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_10_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                           TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00036640543839894235, 129),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00036640543839894235, 129), DataLayout::NHWC});
         INode *node_block_10_1_FakeQuantWithMinMaxVars = _graph.node(id_block_10_1_FakeQuantWithMinMaxVars);
-        node_block_10_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_10_1_FakeQuantWithMinMaxVars", target });
-        node_block_10_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_10_1_FakeQuantWithMinMaxVars.npy",
-                                                                                              DataLayout::NHWC));
-
-        NodeID id_mul_9_y = _graph.add_node<ConstNode>(
-                                TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_10_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_10_1_FakeQuantWithMinMaxVars", target});
+        node_block_10_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_10_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_9_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_9_y = _graph.node(id_mul_9_y);
-        node_mul_9_y->set_common_node_parameters(NodeParams{ "mul_9_y", target });
-        node_mul_9_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_9_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_9_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.1920226370421005e-06),
-            DataLayout::NHWC });
+        node_mul_9_y->set_common_node_parameters(NodeParams{"mul_9_y", target});
+        node_mul_9_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_9_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_9_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.1920226370421005e-06), DataLayout::NHWC});
         INode *node_block_9_1_Conv2D_bias = _graph.node(id_block_9_1_Conv2D_bias);
-        node_block_9_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_9_1_Conv2D_bias", target });
-        node_block_9_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_9_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_9_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_9_1_Conv2D_bias", target});
+        node_block_9_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_9_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_9_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003706997958943248, 129),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.0003706997958943248, 129), DataLayout::NHWC});
         INode *node_block_9_1_FakeQuantWithMinMaxVars = _graph.node(id_block_9_1_FakeQuantWithMinMaxVars);
-        node_block_9_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_9_1_FakeQuantWithMinMaxVars", target });
-        node_block_9_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_9_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_mul_8_y = _graph.add_node<ConstNode>(
-                                TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_9_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_9_1_FakeQuantWithMinMaxVars", target});
+        node_block_9_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_9_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_8_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_8_y = _graph.node(id_mul_8_y);
-        node_mul_8_y->set_common_node_parameters(NodeParams{ "mul_8_y", target });
-        node_mul_8_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_8_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_8_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.218903321387188e-06),
-            DataLayout::NHWC });
+        node_mul_8_y->set_common_node_parameters(NodeParams{"mul_8_y", target});
+        node_mul_8_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_8_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_8_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.218903321387188e-06), DataLayout::NHWC});
         INode *node_block_8_1_Conv2D_bias = _graph.node(id_block_8_1_Conv2D_bias);
-        node_block_8_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_8_1_Conv2D_bias", target });
-        node_block_8_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_8_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_8_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_8_1_Conv2D_bias", target});
+        node_block_8_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_8_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_8_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00038377835880964994, 127),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00038377835880964994, 127), DataLayout::NHWC});
         INode *node_block_8_1_FakeQuantWithMinMaxVars = _graph.node(id_block_8_1_FakeQuantWithMinMaxVars);
-        node_block_8_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_8_1_FakeQuantWithMinMaxVars", target });
-        node_block_8_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_8_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_mul_7_y = _graph.add_node<ConstNode>(
-                                TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_8_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_8_1_FakeQuantWithMinMaxVars", target});
+        node_block_8_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_8_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_7_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_7_y = _graph.node(id_mul_7_y);
-        node_mul_7_y->set_common_node_parameters(NodeParams{ "mul_7_y", target });
-        node_mul_7_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_7_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_7_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.257252392861119e-06),
-            DataLayout::NHWC });
+        node_mul_7_y->set_common_node_parameters(NodeParams{"mul_7_y", target});
+        node_mul_7_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_7_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_7_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.257252392861119e-06), DataLayout::NHWC});
         INode *node_block_7_1_Conv2D_bias = _graph.node(id_block_7_1_Conv2D_bias);
-        node_block_7_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_7_1_Conv2D_bias", target });
-        node_block_7_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_7_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_7_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_7_1_Conv2D_bias", target});
+        node_block_7_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_7_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_7_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00039844686398282647, 129),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00039844686398282647, 129), DataLayout::NHWC});
         INode *node_block_7_1_FakeQuantWithMinMaxVars = _graph.node(id_block_7_1_FakeQuantWithMinMaxVars);
-        node_block_7_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_7_1_FakeQuantWithMinMaxVars", target });
-        node_block_7_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_7_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_mul_6_y = _graph.add_node<ConstNode>(
-                                TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_7_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_7_1_FakeQuantWithMinMaxVars", target});
+        node_block_7_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_7_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_6_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_6_y = _graph.node(id_mul_6_y);
-        node_mul_6_y->set_common_node_parameters(NodeParams{ "mul_6_y", target });
-        node_mul_6_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_6_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_6_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.244850636794581e-06),
-            DataLayout::NHWC });
+        node_mul_6_y->set_common_node_parameters(NodeParams{"mul_6_y", target});
+        node_mul_6_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_6_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_6_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.244850636794581e-06), DataLayout::NHWC});
         INode *node_block_6_1_Conv2D_bias = _graph.node(id_block_6_1_Conv2D_bias);
-        node_block_6_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_6_1_Conv2D_bias", target });
-        node_block_6_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_6_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_6_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_6_1_Conv2D_bias", target});
+        node_block_6_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_6_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_6_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00040187727427110076, 132),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00040187727427110076, 132), DataLayout::NHWC});
         INode *node_block_6_1_FakeQuantWithMinMaxVars = _graph.node(id_block_6_1_FakeQuantWithMinMaxVars);
-        node_block_6_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_6_1_FakeQuantWithMinMaxVars", target });
-        node_block_6_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_6_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_mul_5_y = _graph.add_node<ConstNode>(
-                                TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_6_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_6_1_FakeQuantWithMinMaxVars", target});
+        node_block_6_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_6_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_5_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_5_y = _graph.node(id_mul_5_y);
-        node_mul_5_y->set_common_node_parameters(NodeParams{ "mul_5_y", target });
-        node_mul_5_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_5_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_5_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.241092718373693e-06),
-            DataLayout::NHWC });
+        node_mul_5_y->set_common_node_parameters(NodeParams{"mul_5_y", target});
+        node_mul_5_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_5_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_5_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.241092718373693e-06), DataLayout::NHWC});
         INode *node_block_5_1_Conv2D_bias = _graph.node(id_block_5_1_Conv2D_bias);
-        node_block_5_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_5_1_Conv2D_bias", target });
-        node_block_5_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_5_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_5_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_5_1_Conv2D_bias", target});
+        node_block_5_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_5_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_5_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003938926674891263, 129),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.0003938926674891263, 129), DataLayout::NHWC});
         INode *node_block_5_1_FakeQuantWithMinMaxVars = _graph.node(id_block_5_1_FakeQuantWithMinMaxVars);
-        node_block_5_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_5_1_FakeQuantWithMinMaxVars", target });
-        node_block_5_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_5_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_mul_4_y = _graph.add_node<ConstNode>(
-                                TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_5_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_5_1_FakeQuantWithMinMaxVars", target});
+        node_block_5_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_5_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_4_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_4_y = _graph.node(id_mul_4_y);
-        node_mul_4_y->set_common_node_parameters(NodeParams{ "mul_4_y", target });
-        node_mul_4_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_4_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_4_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.1748390988941537e-06),
-            DataLayout::NHWC });
+        node_mul_4_y->set_common_node_parameters(NodeParams{"mul_4_y", target});
+        node_mul_4_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_4_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_4_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.1748390988941537e-06), DataLayout::NHWC});
         INode *node_block_4_1_Conv2D_bias = _graph.node(id_block_4_1_Conv2D_bias);
-        node_block_4_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_4_1_Conv2D_bias", target });
-        node_block_4_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_4_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_4_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_4_1_Conv2D_bias", target});
+        node_block_4_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_4_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_4_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003788181929849088, 129),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.0003788181929849088, 129), DataLayout::NHWC});
         INode *node_block_4_1_FakeQuantWithMinMaxVars = _graph.node(id_block_4_1_FakeQuantWithMinMaxVars);
-        node_block_4_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_4_1_FakeQuantWithMinMaxVars", target });
-        node_block_4_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_4_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_mul_3_y = _graph.add_node<ConstNode>(
-                                TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_4_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_4_1_FakeQuantWithMinMaxVars", target});
+        node_block_4_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_4_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_3_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_3_y = _graph.node(id_mul_3_y);
-        node_mul_3_y->set_common_node_parameters(NodeParams{ "mul_3_y", target });
-        node_mul_3_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_3_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_3_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.1937011095142225e-06),
-            DataLayout::NHWC });
+        node_mul_3_y->set_common_node_parameters(NodeParams{"mul_3_y", target});
+        node_mul_3_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_3_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_3_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.1937011095142225e-06), DataLayout::NHWC});
         INode *node_block_3_1_Conv2D_bias = _graph.node(id_block_3_1_Conv2D_bias);
-        node_block_3_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_3_1_Conv2D_bias", target });
-        node_block_3_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_3_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_3_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_3_1_Conv2D_bias", target});
+        node_block_3_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_3_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_3_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003944312920793891, 129),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.0003944312920793891, 129), DataLayout::NHWC});
         INode *node_block_3_1_FakeQuantWithMinMaxVars = _graph.node(id_block_3_1_FakeQuantWithMinMaxVars);
-        node_block_3_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_3_1_FakeQuantWithMinMaxVars", target });
-        node_block_3_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_3_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_mul_2_y = _graph.add_node<ConstNode>(
-                                TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_3_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_3_1_FakeQuantWithMinMaxVars", target});
+        node_block_3_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_3_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_2_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_2_y = _graph.node(id_mul_2_y);
-        node_mul_2_y->set_common_node_parameters(NodeParams{ "mul_2_y", target });
-        node_mul_2_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_2_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_2_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.1634580232566805e-06),
-            DataLayout::NHWC });
+        node_mul_2_y->set_common_node_parameters(NodeParams{"mul_2_y", target});
+        node_mul_2_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_2_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_2_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.1634580232566805e-06), DataLayout::NHWC});
         INode *node_block_2_1_Conv2D_bias = _graph.node(id_block_2_1_Conv2D_bias);
-        node_block_2_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_2_1_Conv2D_bias", target });
-        node_block_2_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_2_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_2_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_2_1_Conv2D_bias", target});
+        node_block_2_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_2_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_2_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003789655165746808, 132),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.0003789655165746808, 132), DataLayout::NHWC});
         INode *node_block_2_1_FakeQuantWithMinMaxVars = _graph.node(id_block_2_1_FakeQuantWithMinMaxVars);
-        node_block_2_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_2_1_FakeQuantWithMinMaxVars", target });
-        node_block_2_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_2_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_mul_1_y = _graph.add_node<ConstNode>(
-                                TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_2_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_2_1_FakeQuantWithMinMaxVars", target});
+        node_block_2_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_2_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_1_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_1_y = _graph.node(id_mul_1_y);
-        node_mul_1_y->set_common_node_parameters(NodeParams{ "mul_1_y", target });
-        node_mul_1_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_1_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_1_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.197920255435747e-06),
-            DataLayout::NHWC });
+        node_mul_1_y->set_common_node_parameters(NodeParams{"mul_1_y", target});
+        node_mul_1_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_1_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_1_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.197920255435747e-06), DataLayout::NHWC});
         INode *node_block_1_1_Conv2D_bias = _graph.node(id_block_1_1_Conv2D_bias);
-        node_block_1_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_1_1_Conv2D_bias", target });
-        node_block_1_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_1_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_1_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_1_1_Conv2D_bias", target});
+        node_block_1_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_1_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_1_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00038527738070115447, 132),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00038527738070115447, 132), DataLayout::NHWC});
         INode *node_block_1_1_FakeQuantWithMinMaxVars = _graph.node(id_block_1_1_FakeQuantWithMinMaxVars);
-        node_block_1_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_1_1_FakeQuantWithMinMaxVars", target });
-        node_block_1_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_1_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_mul_y = _graph.add_node<ConstNode>(
-                              TensorDescriptor
-        {
-            scalar_4d_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.0003921568568330258),
-            DataLayout::NHWC });
+        node_block_1_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_1_1_FakeQuantWithMinMaxVars", target});
+        node_block_1_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_1_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_mul_y   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC});
         INode *node_mul_y = _graph.node(id_mul_y);
-        node_mul_y->set_common_node_parameters(NodeParams{ "mul_y", target });
-        node_mul_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_y.npy", DataLayout::NHWC));
-
-        NodeID id_block_0_1_Conv2D_bias = _graph.add_node<ConstNode>(
-                                              TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.315485519626236e-06),
-            DataLayout::NHWC });
+        node_mul_y->set_common_node_parameters(NodeParams{"mul_y", target});
+        node_mul_y->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_y.npy", DataLayout::NHWC));
+
+        NodeID id_block_0_1_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.315485519626236e-06), DataLayout::NHWC});
         INode *node_block_0_1_Conv2D_bias = _graph.node(id_block_0_1_Conv2D_bias);
-        node_block_0_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_0_1_Conv2D_bias", target });
-        node_block_0_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_0_1_Conv2D_bias.npy", DataLayout::NHWC));
+        node_block_0_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_0_1_Conv2D_bias", target});
+        node_block_0_1_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/block_0_1_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_block_0_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                          TensorDescriptor
-        {
-            TensorShape{ 256, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.00039420535904355347, 129),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8,
+                             QuantizationInfo(0.00039420535904355347, 129), DataLayout::NHWC});
         INode *node_block_0_1_FakeQuantWithMinMaxVars = _graph.node(id_block_0_1_FakeQuantWithMinMaxVars);
-        node_block_0_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_0_1_FakeQuantWithMinMaxVars", target });
-        node_block_0_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_0_1_FakeQuantWithMinMaxVars.npy",
-                                                                                             DataLayout::NHWC));
-
-        NodeID id_pre_residual_Conv2D_bias = _graph.add_node<ConstNode>(
-                                                 TensorDescriptor
-        {
-            TensorShape{ 256 },
-            DataType::S32,
-            QuantizationInfo(1.7214160834555514e-06),
-            DataLayout::NHWC });
+        node_block_0_1_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"block_0_1_FakeQuantWithMinMaxVars", target});
+        node_block_0_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/block_0_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
+
+        NodeID id_pre_residual_Conv2D_bias   = _graph.add_node<ConstNode>(TensorDescriptor{
+            TensorShape{256}, DataType::S32, QuantizationInfo(1.7214160834555514e-06), DataLayout::NHWC});
         INode *node_pre_residual_Conv2D_bias = _graph.node(id_pre_residual_Conv2D_bias);
-        node_pre_residual_Conv2D_bias->set_common_node_parameters(NodeParams{ "pre_residual_Conv2D_bias", target });
-        node_pre_residual_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_residual_Conv2D_bias.npy", DataLayout::NHWC));
+        node_pre_residual_Conv2D_bias->set_common_node_parameters(NodeParams{"pre_residual_Conv2D_bias", target});
+        node_pre_residual_Conv2D_bias->output(0)->set_accessor(
+            get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_residual_Conv2D_bias.npy", DataLayout::NHWC));
 
         NodeID id_pre_residual_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>(
-                                                             TensorDescriptor
-        {
-            TensorShape{ 3, 3, 3, 256 },
-            DataType::QASYMM8,
-            QuantizationInfo(0.0004389610840007663, 127),
-            DataLayout::NHWC });
+            TensorDescriptor{TensorShape{3, 3, 3, 256}, DataType::QASYMM8, QuantizationInfo(0.0004389610840007663, 127),
+                             DataLayout::NHWC});
         INode *node_pre_residual_FakeQuantWithMinMaxVars = _graph.node(id_pre_residual_FakeQuantWithMinMaxVars);
-        node_pre_residual_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "pre_residual_FakeQuantWithMinMaxVars", target });
-        node_pre_residual_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_residual_FakeQuantWithMinMaxVars.npy",
-                                                                                                DataLayout::NHWC));
+        node_pre_residual_FakeQuantWithMinMaxVars->set_common_node_parameters(
+            NodeParams{"pre_residual_FakeQuantWithMinMaxVars", target});
+        node_pre_residual_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(
+            data_path, "/cnn_data/edsr_model/pre_residual_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC));
 
         TensorShape input_shape{};
         input_shape.set(0, 3, false).set(1, 360, false).set(2, 640, false).set(3, 1, false);
 
         NodeID id_input = _graph.add_node<InputNode>(
-                              TensorDescriptor
-        {
-            input_shape,
-            DataType::QASYMM8,
-            QuantizationInfo(0.003921568859368563),
-            DataLayout::NHWC });
+            TensorDescriptor{input_shape, DataType::QASYMM8, QuantizationInfo(0.003921568859368563), DataLayout::NHWC});
         INode *node_input = _graph.node(id_input);
-        node_input->set_common_node_parameters(NodeParams{ "input", target });
+        node_input->set_common_node_parameters(NodeParams{"input", target});
         node_input->output(0)->set_accessor(get_input_accessor(common_params));
 
-        NodeID id_pre_residual_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                             PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.0033370566088706255, 96));
+        NodeID id_pre_residual_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.0033370566088706255, 96));
         INode *node_pre_residual_BiasAdd = _graph.node(id_pre_residual_BiasAdd);
-        node_pre_residual_BiasAdd->set_common_node_parameters(NodeParams{ "pre_residual_BiasAdd", target });
+        node_pre_residual_BiasAdd->set_common_node_parameters(NodeParams{"pre_residual_BiasAdd", target});
         _graph.add_connection(id_input, 0, id_pre_residual_BiasAdd, 0);
         _graph.add_connection(id_pre_residual_FakeQuantWithMinMaxVars, 0, id_pre_residual_BiasAdd, 1);
         _graph.add_connection(id_pre_residual_Conv2D_bias, 0, id_pre_residual_BiasAdd, 2);
 
-        NodeID id_block_0_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.007344874087721109, 185));
+        NodeID id_block_0_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.007344874087721109, 185));
         INode *node_block_0_1_BiasAdd = _graph.node(id_block_0_1_BiasAdd);
-        node_block_0_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_0_1_BiasAdd", target });
+        node_block_0_1_BiasAdd->set_common_node_parameters(NodeParams{"block_0_1_BiasAdd", target});
         _graph.add_connection(id_pre_residual_BiasAdd, 0, id_block_0_1_BiasAdd, 0);
         _graph.add_connection(id_block_0_1_FakeQuantWithMinMaxVars, 0, id_block_0_1_BiasAdd, 1);
         _graph.add_connection(id_block_0_1_Conv2D_bias, 0, id_block_0_1_BiasAdd, 2);
 
         NodeID id_mul = _graph.add_node<EltwiseLayerNode>(
-                            descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0006341293919831514, 174 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0006341293919831514, 174}});
         INode *node_mul = _graph.node(id_mul);
-        node_mul->set_common_node_parameters(NodeParams{ "mul", target });
+        node_mul->set_common_node_parameters(NodeParams{"mul", target});
         _graph.add_connection(id_block_0_1_BiasAdd, 0, id_mul, 0);
         _graph.add_connection(id_mul_y, 0, id_mul, 1);
 
         NodeID id_add = _graph.add_node<EltwiseLayerNode>(
-                            descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0031092411372810602, 95 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0031092411372810602, 95}});
         INode *node_add = _graph.node(id_add);
-        node_add->set_common_node_parameters(NodeParams{ "add", target });
+        node_add->set_common_node_parameters(NodeParams{"add", target});
         _graph.add_connection(id_pre_residual_BiasAdd, 0, id_add, 0);
         _graph.add_connection(id_mul, 0, id_add, 1);
 
-        NodeID id_block_1_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.005333727691322565, 117));
+        NodeID id_block_1_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.005333727691322565, 117));
         INode *node_block_1_1_BiasAdd = _graph.node(id_block_1_1_BiasAdd);
-        node_block_1_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_1_1_BiasAdd", target });
+        node_block_1_1_BiasAdd->set_common_node_parameters(NodeParams{"block_1_1_BiasAdd", target});
         _graph.add_connection(id_add, 0, id_block_1_1_BiasAdd, 0);
         _graph.add_connection(id_block_1_1_FakeQuantWithMinMaxVars, 0, id_block_1_1_BiasAdd, 1);
         _graph.add_connection(id_block_1_1_Conv2D_bias, 0, id_block_1_1_BiasAdd, 2);
 
         NodeID id_mul_1 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004965941770933568, 122 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004965941770933568, 122}});
         INode *node_mul_1 = _graph.node(id_mul_1);
-        node_mul_1->set_common_node_parameters(NodeParams{ "mul_1", target });
+        node_mul_1->set_common_node_parameters(NodeParams{"mul_1", target});
         _graph.add_connection(id_block_1_1_BiasAdd, 0, id_mul_1, 0);
         _graph.add_connection(id_mul_1_y, 0, id_mul_1, 1);
 
         NodeID id_add_1 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0030700892675668, 96 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0030700892675668, 96}});
         INode *node_add_1 = _graph.node(id_add_1);
-        node_add_1->set_common_node_parameters(NodeParams{ "add_1", target });
+        node_add_1->set_common_node_parameters(NodeParams{"add_1", target});
         _graph.add_connection(id_add, 0, id_add_1, 0);
         _graph.add_connection(id_mul_1, 0, id_add_1, 1);
 
-        NodeID id_block_2_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.004199742339551449, 132));
+        NodeID id_block_2_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.004199742339551449, 132));
         INode *node_block_2_1_BiasAdd = _graph.node(id_block_2_1_BiasAdd);
-        node_block_2_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_2_1_BiasAdd", target });
+        node_block_2_1_BiasAdd->set_common_node_parameters(NodeParams{"block_2_1_BiasAdd", target});
         _graph.add_connection(id_add_1, 0, id_block_2_1_BiasAdd, 0);
         _graph.add_connection(id_block_2_1_FakeQuantWithMinMaxVars, 0, id_block_2_1_BiasAdd, 1);
         _graph.add_connection(id_block_2_1_Conv2D_bias, 0, id_block_2_1_BiasAdd, 2);
 
         NodeID id_mul_2 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004133903712499887, 130 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004133903712499887, 130}});
         INode *node_mul_2 = _graph.node(id_mul_2);
-        node_mul_2->set_common_node_parameters(NodeParams{ "mul_2", target });
+        node_mul_2->set_common_node_parameters(NodeParams{"mul_2", target});
         _graph.add_connection(id_block_2_1_BiasAdd, 0, id_mul_2, 0);
         _graph.add_connection(id_mul_2_y, 0, id_mul_2, 1);
 
         NodeID id_add_2 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003026385325938463, 94 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003026385325938463, 94}});
         INode *node_add_2 = _graph.node(id_add_2);
-        node_add_2->set_common_node_parameters(NodeParams{ "add_2", target });
+        node_add_2->set_common_node_parameters(NodeParams{"add_2", target});
         _graph.add_connection(id_add_1, 0, id_add_2, 0);
         _graph.add_connection(id_mul_2, 0, id_add_2, 1);
 
-        NodeID id_block_3_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.003977528307586908, 142));
+        NodeID id_block_3_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.003977528307586908, 142));
         INode *node_block_3_1_BiasAdd = _graph.node(id_block_3_1_BiasAdd);
-        node_block_3_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_3_1_BiasAdd", target });
+        node_block_3_1_BiasAdd->set_common_node_parameters(NodeParams{"block_3_1_BiasAdd", target});
         _graph.add_connection(id_add_2, 0, id_block_3_1_BiasAdd, 0);
         _graph.add_connection(id_block_3_1_FakeQuantWithMinMaxVars, 0, id_block_3_1_BiasAdd, 1);
         _graph.add_connection(id_block_3_1_Conv2D_bias, 0, id_block_3_1_BiasAdd, 2);
 
         NodeID id_mul_3 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0003943995980080217, 141 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0003943995980080217, 141}});
         INode *node_mul_3 = _graph.node(id_mul_3);
-        node_mul_3->set_common_node_parameters(NodeParams{ "mul_3", target });
+        node_mul_3->set_common_node_parameters(NodeParams{"mul_3", target});
         _graph.add_connection(id_block_3_1_BiasAdd, 0, id_mul_3, 0);
         _graph.add_connection(id_mul_3_y, 0, id_mul_3, 1);
 
         NodeID id_add_3 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003101327223703265, 98 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003101327223703265, 98}});
         INode *node_add_3 = _graph.node(id_add_3);
-        node_add_3->set_common_node_parameters(NodeParams{ "add_3", target });
+        node_add_3->set_common_node_parameters(NodeParams{"add_3", target});
         _graph.add_connection(id_add_2, 0, id_add_3, 0);
         _graph.add_connection(id_mul_3, 0, id_add_3, 1);
 
-        NodeID id_block_4_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.0045388080179691315, 146));
+        NodeID id_block_4_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.0045388080179691315, 146));
         INode *node_block_4_1_BiasAdd = _graph.node(id_block_4_1_BiasAdd);
-        node_block_4_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_4_1_BiasAdd", target });
+        node_block_4_1_BiasAdd->set_common_node_parameters(NodeParams{"block_4_1_BiasAdd", target});
         _graph.add_connection(id_add_3, 0, id_block_4_1_BiasAdd, 0);
         _graph.add_connection(id_block_4_1_FakeQuantWithMinMaxVars, 0, id_block_4_1_BiasAdd, 1);
         _graph.add_connection(id_block_4_1_Conv2D_bias, 0, id_block_4_1_BiasAdd, 2);
 
         NodeID id_mul_4 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00044342130422592163, 143 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00044342130422592163, 143}});
         INode *node_mul_4 = _graph.node(id_mul_4);
-        node_mul_4->set_common_node_parameters(NodeParams{ "mul_4", target });
+        node_mul_4->set_common_node_parameters(NodeParams{"mul_4", target});
         _graph.add_connection(id_block_4_1_BiasAdd, 0, id_mul_4, 0);
         _graph.add_connection(id_mul_4_y, 0, id_mul_4, 1);
 
         NodeID id_add_4 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003150839824229479, 98 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003150839824229479, 98}});
         INode *node_add_4 = _graph.node(id_add_4);
-        node_add_4->set_common_node_parameters(NodeParams{ "add_4", target });
+        node_add_4->set_common_node_parameters(NodeParams{"add_4", target});
         _graph.add_connection(id_add_3, 0, id_add_4, 0);
         _graph.add_connection(id_mul_4, 0, id_add_4, 1);
 
-        NodeID id_block_5_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.00402890844270587, 132));
+        NodeID id_block_5_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.00402890844270587, 132));
         INode *node_block_5_1_BiasAdd = _graph.node(id_block_5_1_BiasAdd);
-        node_block_5_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_5_1_BiasAdd", target });
+        node_block_5_1_BiasAdd->set_common_node_parameters(NodeParams{"block_5_1_BiasAdd", target});
         _graph.add_connection(id_add_4, 0, id_block_5_1_BiasAdd, 0);
         _graph.add_connection(id_block_5_1_FakeQuantWithMinMaxVars, 0, id_block_5_1_BiasAdd, 1);
         _graph.add_connection(id_block_5_1_Conv2D_bias, 0, id_block_5_1_BiasAdd, 2);
 
         NodeID id_mul_5 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004023382789455354, 132 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004023382789455354, 132}});
         INode *node_mul_5 = _graph.node(id_mul_5);
-        node_mul_5->set_common_node_parameters(NodeParams{ "mul_5", target });
+        node_mul_5->set_common_node_parameters(NodeParams{"mul_5", target});
         _graph.add_connection(id_block_5_1_BiasAdd, 0, id_mul_5, 0);
         _graph.add_connection(id_mul_5_y, 0, id_mul_5, 1);
 
         NodeID id_add_5 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0030975888948887587, 94 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0030975888948887587, 94}});
         INode *node_add_5 = _graph.node(id_add_5);
-        node_add_5->set_common_node_parameters(NodeParams{ "add_5", target });
+        node_add_5->set_common_node_parameters(NodeParams{"add_5", target});
         _graph.add_connection(id_add_4, 0, id_add_5, 0);
         _graph.add_connection(id_mul_5, 0, id_add_5, 1);
 
-        NodeID id_block_6_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.00421866774559021, 125));
+        NodeID id_block_6_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.00421866774559021, 125));
         INode *node_block_6_1_BiasAdd = _graph.node(id_block_6_1_BiasAdd);
-        node_block_6_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_6_1_BiasAdd", target });
+        node_block_6_1_BiasAdd->set_common_node_parameters(NodeParams{"block_6_1_BiasAdd", target});
         _graph.add_connection(id_add_5, 0, id_block_6_1_BiasAdd, 0);
         _graph.add_connection(id_block_6_1_FakeQuantWithMinMaxVars, 0, id_block_6_1_BiasAdd, 1);
         _graph.add_connection(id_block_6_1_Conv2D_bias, 0, id_block_6_1_BiasAdd, 2);
 
         NodeID id_mul_6 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00041950203012675047, 125 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00041950203012675047, 125}});
         INode *node_mul_6 = _graph.node(id_mul_6);
-        node_mul_6->set_common_node_parameters(NodeParams{ "mul_6", target });
+        node_mul_6->set_common_node_parameters(NodeParams{"mul_6", target});
         _graph.add_connection(id_block_6_1_BiasAdd, 0, id_mul_6, 0);
         _graph.add_connection(id_mul_6_y, 0, id_mul_6, 1);
 
         NodeID id_add_6 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003155382815748453, 92 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003155382815748453, 92}});
         INode *node_add_6 = _graph.node(id_add_6);
-        node_add_6->set_common_node_parameters(NodeParams{ "add_6", target });
+        node_add_6->set_common_node_parameters(NodeParams{"add_6", target});
         _graph.add_connection(id_add_5, 0, id_add_6, 0);
         _graph.add_connection(id_mul_6, 0, id_add_6, 1);
 
-        NodeID id_block_7_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.004250136204063892, 143));
+        NodeID id_block_7_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.004250136204063892, 143));
         INode *node_block_7_1_BiasAdd = _graph.node(id_block_7_1_BiasAdd);
-        node_block_7_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_7_1_BiasAdd", target });
+        node_block_7_1_BiasAdd->set_common_node_parameters(NodeParams{"block_7_1_BiasAdd", target});
         _graph.add_connection(id_add_6, 0, id_block_7_1_BiasAdd, 0);
         _graph.add_connection(id_block_7_1_FakeQuantWithMinMaxVars, 0, id_block_7_1_BiasAdd, 1);
         _graph.add_connection(id_block_7_1_Conv2D_bias, 0, id_block_7_1_BiasAdd, 2);
 
         NodeID id_mul_7 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00042401350219734013, 142 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00042401350219734013, 142}});
         INode *node_mul_7 = _graph.node(id_mul_7);
-        node_mul_7->set_common_node_parameters(NodeParams{ "mul_7", target });
+        node_mul_7->set_common_node_parameters(NodeParams{"mul_7", target});
         _graph.add_connection(id_block_7_1_BiasAdd, 0, id_mul_7, 0);
         _graph.add_connection(id_mul_7_y, 0, id_mul_7, 1);
 
         NodeID id_add_7 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0031760605052113533, 86 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0031760605052113533, 86}});
         INode *node_add_7 = _graph.node(id_add_7);
-        node_add_7->set_common_node_parameters(NodeParams{ "add_7", target });
+        node_add_7->set_common_node_parameters(NodeParams{"add_7", target});
         _graph.add_connection(id_add_6, 0, id_add_7, 0);
         _graph.add_connection(id_mul_7, 0, id_add_7, 1);
 
-        NodeID id_block_8_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.004277155734598637, 123));
+        NodeID id_block_8_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.004277155734598637, 123));
         INode *node_block_8_1_BiasAdd = _graph.node(id_block_8_1_BiasAdd);
-        node_block_8_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_8_1_BiasAdd", target });
+        node_block_8_1_BiasAdd->set_common_node_parameters(NodeParams{"block_8_1_BiasAdd", target});
         _graph.add_connection(id_add_7, 0, id_block_8_1_BiasAdd, 0);
         _graph.add_connection(id_block_8_1_FakeQuantWithMinMaxVars, 0, id_block_8_1_BiasAdd, 1);
         _graph.add_connection(id_block_8_1_Conv2D_bias, 0, id_block_8_1_BiasAdd, 2);
 
         NodeID id_mul_8 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00042673019925132394, 123 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00042673019925132394, 123}});
         INode *node_mul_8 = _graph.node(id_mul_8);
-        node_mul_8->set_common_node_parameters(NodeParams{ "mul_8", target });
+        node_mul_8->set_common_node_parameters(NodeParams{"mul_8", target});
         _graph.add_connection(id_block_8_1_BiasAdd, 0, id_mul_8, 0);
         _graph.add_connection(id_mul_8_y, 0, id_mul_8, 1);
 
         NodeID id_add_8 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0032156009692698717, 86 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0032156009692698717, 86}});
         INode *node_add_8 = _graph.node(id_add_8);
-        node_add_8->set_common_node_parameters(NodeParams{ "add_8", target });
+        node_add_8->set_common_node_parameters(NodeParams{"add_8", target});
         _graph.add_connection(id_add_7, 0, id_add_8, 0);
         _graph.add_connection(id_mul_8, 0, id_add_8, 1);
 
-        NodeID id_block_9_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                          PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.00445037754252553, 129));
+        NodeID id_block_9_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.00445037754252553, 129));
         INode *node_block_9_1_BiasAdd = _graph.node(id_block_9_1_BiasAdd);
-        node_block_9_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_9_1_BiasAdd", target });
+        node_block_9_1_BiasAdd->set_common_node_parameters(NodeParams{"block_9_1_BiasAdd", target});
         _graph.add_connection(id_add_8, 0, id_block_9_1_BiasAdd, 0);
         _graph.add_connection(id_block_9_1_FakeQuantWithMinMaxVars, 0, id_block_9_1_BiasAdd, 1);
         _graph.add_connection(id_block_9_1_Conv2D_bias, 0, id_block_9_1_BiasAdd, 2);
 
         NodeID id_mul_9 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004448975087143481, 129 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004448975087143481, 129}});
         INode *node_mul_9 = _graph.node(id_mul_9);
-        node_mul_9->set_common_node_parameters(NodeParams{ "mul_9", target });
+        node_mul_9->set_common_node_parameters(NodeParams{"mul_9", target});
         _graph.add_connection(id_block_9_1_BiasAdd, 0, id_mul_9, 0);
         _graph.add_connection(id_mul_9_y, 0, id_mul_9, 1);
 
         NodeID id_add_9 = _graph.add_node<EltwiseLayerNode>(
-                              descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0032742770854383707, 80 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0032742770854383707, 80}});
         INode *node_add_9 = _graph.node(id_add_9);
-        node_add_9->set_common_node_parameters(NodeParams{ "add_9", target });
+        node_add_9->set_common_node_parameters(NodeParams{"add_9", target});
         _graph.add_connection(id_add_8, 0, id_add_9, 0);
         _graph.add_connection(id_mul_9, 0, id_add_9, 1);
 
-        NodeID id_block_10_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                           PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.003614710411056876, 131));
+        NodeID id_block_10_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.003614710411056876, 131));
         INode *node_block_10_1_BiasAdd = _graph.node(id_block_10_1_BiasAdd);
-        node_block_10_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_10_1_BiasAdd", target });
+        node_block_10_1_BiasAdd->set_common_node_parameters(NodeParams{"block_10_1_BiasAdd", target});
         _graph.add_connection(id_add_9, 0, id_block_10_1_BiasAdd, 0);
         _graph.add_connection(id_block_10_1_FakeQuantWithMinMaxVars, 0, id_block_10_1_BiasAdd, 1);
         _graph.add_connection(id_block_10_1_Conv2D_bias, 0, id_block_10_1_BiasAdd, 2);
 
         NodeID id_mul_10 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00036083892337046564, 130 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00036083892337046564, 130}});
         INode *node_mul_10 = _graph.node(id_mul_10);
-        node_mul_10->set_common_node_parameters(NodeParams{ "mul_10", target });
+        node_mul_10->set_common_node_parameters(NodeParams{"mul_10", target});
         _graph.add_connection(id_block_10_1_BiasAdd, 0, id_mul_10, 0);
         _graph.add_connection(id_mul_10_y, 0, id_mul_10, 1);
 
         NodeID id_add_10 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0031881770119071007, 81 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0031881770119071007, 81}});
         INode *node_add_10 = _graph.node(id_add_10);
-        node_add_10->set_common_node_parameters(NodeParams{ "add_10", target });
+        node_add_10->set_common_node_parameters(NodeParams{"add_10", target});
         _graph.add_connection(id_add_9, 0, id_add_10, 0);
         _graph.add_connection(id_mul_10, 0, id_add_10, 1);
 
-        NodeID id_block_11_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                           PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.003969002980738878, 133));
+        NodeID id_block_11_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.003969002980738878, 133));
         INode *node_block_11_1_BiasAdd = _graph.node(id_block_11_1_BiasAdd);
-        node_block_11_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_11_1_BiasAdd", target });
+        node_block_11_1_BiasAdd->set_common_node_parameters(NodeParams{"block_11_1_BiasAdd", target});
         _graph.add_connection(id_add_10, 0, id_block_11_1_BiasAdd, 0);
         _graph.add_connection(id_block_11_1_FakeQuantWithMinMaxVars, 0, id_block_11_1_BiasAdd, 1);
         _graph.add_connection(id_block_11_1_Conv2D_bias, 0, id_block_11_1_BiasAdd, 2);
 
         NodeID id_mul_11 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0003968806122429669, 133 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0003968806122429669, 133}});
         INode *node_mul_11 = _graph.node(id_mul_11);
-        node_mul_11->set_common_node_parameters(NodeParams{ "mul_11", target });
+        node_mul_11->set_common_node_parameters(NodeParams{"mul_11", target});
         _graph.add_connection(id_block_11_1_BiasAdd, 0, id_mul_11, 0);
         _graph.add_connection(id_mul_11_y, 0, id_mul_11, 1);
 
         NodeID id_add_11 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0032707711216062307, 80 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0032707711216062307, 80}});
         INode *node_add_11 = _graph.node(id_add_11);
-        node_add_11->set_common_node_parameters(NodeParams{ "add_11", target });
+        node_add_11->set_common_node_parameters(NodeParams{"add_11", target});
         _graph.add_connection(id_add_10, 0, id_add_11, 0);
         _graph.add_connection(id_mul_11, 0, id_add_11, 1);
 
-        NodeID id_block_12_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                           PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.004366801120340824, 110));
+        NodeID id_block_12_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.004366801120340824, 110));
         INode *node_block_12_1_BiasAdd = _graph.node(id_block_12_1_BiasAdd);
-        node_block_12_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_12_1_BiasAdd", target });
+        node_block_12_1_BiasAdd->set_common_node_parameters(NodeParams{"block_12_1_BiasAdd", target});
         _graph.add_connection(id_add_11, 0, id_block_12_1_BiasAdd, 0);
         _graph.add_connection(id_block_12_1_FakeQuantWithMinMaxVars, 0, id_block_12_1_BiasAdd, 1);
         _graph.add_connection(id_block_12_1_Conv2D_bias, 0, id_block_12_1_BiasAdd, 2);
 
         NodeID id_mul_12 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004365936329122633, 110 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004365936329122633, 110}});
         INode *node_mul_12 = _graph.node(id_mul_12);
-        node_mul_12->set_common_node_parameters(NodeParams{ "mul_12", target });
+        node_mul_12->set_common_node_parameters(NodeParams{"mul_12", target});
         _graph.add_connection(id_block_12_1_BiasAdd, 0, id_mul_12, 0);
         _graph.add_connection(id_mul_12_y, 0, id_mul_12, 1);
 
         NodeID id_add_12 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003275055903941393, 79 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003275055903941393, 79}});
         INode *node_add_12 = _graph.node(id_add_12);
-        node_add_12->set_common_node_parameters(NodeParams{ "add_12", target });
+        node_add_12->set_common_node_parameters(NodeParams{"add_12", target});
         _graph.add_connection(id_add_11, 0, id_add_12, 0);
         _graph.add_connection(id_mul_12, 0, id_add_12, 1);
 
-        NodeID id_block_13_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                           PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.004386766813695431, 139));
+        NodeID id_block_13_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.004386766813695431, 139));
         INode *node_block_13_1_BiasAdd = _graph.node(id_block_13_1_BiasAdd);
-        node_block_13_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_13_1_BiasAdd", target });
+        node_block_13_1_BiasAdd->set_common_node_parameters(NodeParams{"block_13_1_BiasAdd", target});
         _graph.add_connection(id_add_12, 0, id_block_13_1_BiasAdd, 0);
         _graph.add_connection(id_block_13_1_FakeQuantWithMinMaxVars, 0, id_block_13_1_BiasAdd, 1);
         _graph.add_connection(id_block_13_1_Conv2D_bias, 0, id_block_13_1_BiasAdd, 2);
 
         NodeID id_mul_13 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004385628562886268, 139 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004385628562886268, 139}});
         INode *node_mul_13 = _graph.node(id_mul_13);
-        node_mul_13->set_common_node_parameters(NodeParams{ "mul_13", target });
+        node_mul_13->set_common_node_parameters(NodeParams{"mul_13", target});
         _graph.add_connection(id_block_13_1_BiasAdd, 0, id_mul_13, 0);
         _graph.add_connection(id_mul_13_y, 0, id_mul_13, 1);
 
         NodeID id_add_13 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0033287261612713337, 78 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0033287261612713337, 78}});
         INode *node_add_13 = _graph.node(id_add_13);
-        node_add_13->set_common_node_parameters(NodeParams{ "add_13", target });
+        node_add_13->set_common_node_parameters(NodeParams{"add_13", target});
         _graph.add_connection(id_add_12, 0, id_add_13, 0);
         _graph.add_connection(id_mul_13, 0, id_add_13, 1);
 
-        NodeID id_block_14_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                           PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.0038069337606430054, 130));
+        NodeID id_block_14_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.0038069337606430054, 130));
         INode *node_block_14_1_BiasAdd = _graph.node(id_block_14_1_BiasAdd);
-        node_block_14_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_14_1_BiasAdd", target });
+        node_block_14_1_BiasAdd->set_common_node_parameters(NodeParams{"block_14_1_BiasAdd", target});
         _graph.add_connection(id_add_13, 0, id_block_14_1_BiasAdd, 0);
         _graph.add_connection(id_block_14_1_FakeQuantWithMinMaxVars, 0, id_block_14_1_BiasAdd, 1);
         _graph.add_connection(id_block_14_1_Conv2D_bias, 0, id_block_14_1_BiasAdd, 2);
 
         NodeID id_mul_14 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00037829321809113026, 130 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00037829321809113026, 130}});
         INode *node_mul_14 = _graph.node(id_mul_14);
-        node_mul_14->set_common_node_parameters(NodeParams{ "mul_14", target });
+        node_mul_14->set_common_node_parameters(NodeParams{"mul_14", target});
         _graph.add_connection(id_block_14_1_BiasAdd, 0, id_mul_14, 0);
         _graph.add_connection(id_mul_14_y, 0, id_mul_14, 1);
 
         NodeID id_add_14 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0033590947277843952, 77 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0033590947277843952, 77}});
         INode *node_add_14 = _graph.node(id_add_14);
-        node_add_14->set_common_node_parameters(NodeParams{ "add_14", target });
+        node_add_14->set_common_node_parameters(NodeParams{"add_14", target});
         _graph.add_connection(id_add_13, 0, id_add_14, 0);
         _graph.add_connection(id_mul_14, 0, id_add_14, 1);
 
-        NodeID id_block_15_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                           PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.004009159281849861, 130));
+        NodeID id_block_15_1_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.004009159281849861, 130));
         INode *node_block_15_1_BiasAdd = _graph.node(id_block_15_1_BiasAdd);
-        node_block_15_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_15_1_BiasAdd", target });
+        node_block_15_1_BiasAdd->set_common_node_parameters(NodeParams{"block_15_1_BiasAdd", target});
         _graph.add_connection(id_add_14, 0, id_block_15_1_BiasAdd, 0);
         _graph.add_connection(id_block_15_1_FakeQuantWithMinMaxVars, 0, id_block_15_1_BiasAdd, 1);
         _graph.add_connection(id_block_15_1_Conv2D_bias, 0, id_block_15_1_BiasAdd, 2);
 
         NodeID id_mul_15 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004008286341559142, 130 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004008286341559142, 130}});
         INode *node_mul_15 = _graph.node(id_mul_15);
-        node_mul_15->set_common_node_parameters(NodeParams{ "mul_15", target });
+        node_mul_15->set_common_node_parameters(NodeParams{"mul_15", target});
         _graph.add_connection(id_block_15_1_BiasAdd, 0, id_mul_15, 0);
         _graph.add_connection(id_mul_15_y, 0, id_mul_15, 1);
 
         NodeID id_add_15 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0035031239967793226, 78 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0035031239967793226, 78}});
         INode *node_add_15 = _graph.node(id_add_15);
-        node_add_15->set_common_node_parameters(NodeParams{ "add_15", target });
+        node_add_15->set_common_node_parameters(NodeParams{"add_15", target});
         _graph.add_connection(id_add_14, 0, id_add_15, 0);
         _graph.add_connection(id_mul_15, 0, id_add_15, 1);
 
-        NodeID id_post_residual_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                              PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.005167999770492315, 112));
+        NodeID id_post_residual_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.005167999770492315, 112));
         INode *node_post_residual_BiasAdd = _graph.node(id_post_residual_BiasAdd);
-        node_post_residual_BiasAdd->set_common_node_parameters(NodeParams{ "post_residual_BiasAdd", target });
+        node_post_residual_BiasAdd->set_common_node_parameters(NodeParams{"post_residual_BiasAdd", target});
         _graph.add_connection(id_add_15, 0, id_post_residual_BiasAdd, 0);
         _graph.add_connection(id_post_residual_FakeQuantWithMinMaxVars, 0, id_post_residual_BiasAdd, 1);
         _graph.add_connection(id_post_residual_Conv2D_bias, 0, id_post_residual_BiasAdd, 2);
 
         NodeID id_add_16 = _graph.add_node<EltwiseLayerNode>(
-                               descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0065071373246610165, 89 } });
+            descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0065071373246610165, 89}});
         INode *node_add_16 = _graph.node(id_add_16);
-        node_add_16->set_common_node_parameters(NodeParams{ "add_16", target });
+        node_add_16->set_common_node_parameters(NodeParams{"add_16", target});
         _graph.add_connection(id_post_residual_BiasAdd, 0, id_add_16, 0);
         _graph.add_connection(id_pre_residual_BiasAdd, 0, id_add_16, 1);
 
-        NodeID id_pre_upscale_BiasAdd = _graph.add_node<ConvolutionLayerNode>(
-                                            PadStrideInfo
-        {
-            1, 1,
-            1, 1,
-            1, 1,
-            DimensionRoundingType::FLOOR },
-        1,
-        arm_compute::graph::ConvolutionMethod::Default,
-        FastMathHint::Disabled,
-        QuantizationInfo(0.005013593938201666, 26));
+        NodeID id_pre_upscale_BiasAdd =
+            _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1,
+                                                  arm_compute::graph::ConvolutionMethod::Default,
+                                                  FastMathHint::Disabled, QuantizationInfo(0.005013593938201666, 26));
         INode *node_pre_upscale_BiasAdd = _graph.node(id_pre_upscale_BiasAdd);
-        node_pre_upscale_BiasAdd->set_common_node_parameters(NodeParams{ "pre_upscale_BiasAdd", target });
+        node_pre_upscale_BiasAdd->set_common_node_parameters(NodeParams{"pre_upscale_BiasAdd", target});
         _graph.add_connection(id_add_16, 0, id_pre_upscale_BiasAdd, 0);
         _graph.add_connection(id_pre_upscale_FakeQuantWithMinMaxVars, 0, id_pre_upscale_BiasAdd, 1);
         _graph.add_connection(id_pre_upscale_Conv2D_bias, 0, id_pre_upscale_BiasAdd, 2);
 
         NodeID id_upscale_net_FakeQuantWithMinMaxVars_1 = _graph.add_node<DeconvolutionLayerNode>(
-                                                              descriptors::DeconvolutionLayerDescriptor
-        {
-            PadStrideInfo{
-                2, 2,
-                0, 0,
-                0, 0,
-                DimensionRoundingType::FLOOR },
-            QuantizationInfo{ 0.004990961868315935, 26 } });
+            descriptors::DeconvolutionLayerDescriptor{PadStrideInfo{2, 2, 0, 0, 0, 0, DimensionRoundingType::FLOOR},
+                                                      QuantizationInfo{0.004990961868315935, 26}});
         INode *node_upscale_net_FakeQuantWithMinMaxVars_1 = _graph.node(id_upscale_net_FakeQuantWithMinMaxVars_1);
-        node_upscale_net_FakeQuantWithMinMaxVars_1->set_common_node_parameters(NodeParams{ "upscale_net_FakeQuantWithMinMaxVars_1", target });
+        node_upscale_net_FakeQuantWithMinMaxVars_1->set_common_node_parameters(
+            NodeParams{"upscale_net_FakeQuantWithMinMaxVars_1", target});
         _graph.add_connection(id_pre_upscale_BiasAdd, 0, id_upscale_net_FakeQuantWithMinMaxVars_1, 0);
-        _graph.add_connection(id_upscale_net_FakeQuantWithMinMaxVars_transposed, 0, id_upscale_net_FakeQuantWithMinMaxVars_1, 1);
+        _graph.add_connection(id_upscale_net_FakeQuantWithMinMaxVars_transposed, 0,
+                              id_upscale_net_FakeQuantWithMinMaxVars_1, 1);
         TensorShape output_shape;
         output_shape.set(0, 3, false).set(1, 720, false).set(2, 1280, false).set(3, 1, false);
 
         NodeID id_output_140211982446376   = _graph.add_node<OutputNode>();
         INode *node_output_140211982446376 = _graph.node(id_output_140211982446376);
-        node_output_140211982446376->set_common_node_parameters(NodeParams{ "output_140211982446376", target });
+        node_output_140211982446376->set_common_node_parameters(NodeParams{"output_140211982446376", target});
         _graph.add_connection(id_upscale_net_FakeQuantWithMinMaxVars_1, 0, id_output_140211982446376, 0);
-        node_output_140211982446376->input(0)->set_accessor(get_npy_output_accessor(expected_output_filename.value(), output_shape, common_params.data_type,
-                                                                                    common_params.data_layout));
+        node_output_140211982446376->input(0)->set_accessor(get_npy_output_accessor(
+            expected_output_filename.value(), output_shape, common_params.data_type, common_params.data_layout));
 
         return true;
     }
diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp
index 84a10ffce1..f431fc412b 100644
--- a/examples/graph_googlenet.cpp
+++ b/examples/graph_googlenet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class GraphGooglenetExample : public Example
 {
 public:
-    GraphGooglenetExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "GoogleNet")
+    GraphGooglenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "GoogleNet")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,14 +49,15 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
         }
 
         // Checks
-        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type),
+                                "QASYMM8 not supported for this graph");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -65,64 +66,99 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+        const std::array<float, 3>     mean_rgb{{122.68f, 116.67f, 104.01f}};
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb);
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
+              << ConvolutionLayer(7U, 7U, 64U,
+                                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy",
+                                                       weights_layout),
+                                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_b.npy"),
+                                  PadStrideInfo(2, 2, 3, 3))
+                     .set_name("conv1/7x7_s2")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv1/relu_7x7")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool1/3x3_s2")
+              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
+                     .set_name("pool1/norm1")
               << ConvolutionLayer(
-                  7U, 7U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_b.npy"),
-                  PadStrideInfo(2, 2, 3, 3))
-              .set_name("conv1/7x7_s2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/relu_7x7")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool1/3x3_s2")
-              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("pool1/norm1")
-              << ConvolutionLayer(
-                  1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("conv2/3x3_reduce")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2/relu_3x3_reduce")
+                     1U, 1U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("conv2/3x3_reduce")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv2/relu_3x3_reduce")
               << ConvolutionLayer(
-                  3U, 3U, 192U,
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv2/3x3")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2/relu_3x3")
-              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("conv2/norm2")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool2/3x3_s2");
-        graph << get_inception_node(data_path, "inception_3a", weights_layout, 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U).set_name("inception_3a/concat");
-        graph << get_inception_node(data_path, "inception_3b", weights_layout, 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U).set_name("inception_3b/concat");
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool3/3x3_s2");
-        graph << get_inception_node(data_path, "inception_4a", weights_layout, 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U).set_name("inception_4a/concat");
-        graph << get_inception_node(data_path, "inception_4b", weights_layout, 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U).set_name("inception_4b/concat");
-        graph << get_inception_node(data_path, "inception_4c", weights_layout, 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U).set_name("inception_4c/concat");
-        graph << get_inception_node(data_path, "inception_4d", weights_layout, 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U).set_name("inception_4d/concat");
-        graph << get_inception_node(data_path, "inception_4e", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U).set_name("inception_4e/concat");
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool4/3x3_s2");
-        graph << get_inception_node(data_path, "inception_5a", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U).set_name("inception_5a/concat");
-        graph << get_inception_node(data_path, "inception_5b", weights_layout, 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U).set_name("inception_5b/concat");
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, operation_layout, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("pool5/7x7_s1")
+                     3U, 3U, 192U,
+                     get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy", weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_b.npy"),
+                     PadStrideInfo(1, 1, 1, 1))
+                     .set_name("conv2/3x3")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv2/relu_3x3")
+              << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f))
+                     .set_name("conv2/norm2")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool2/3x3_s2");
+        graph << get_inception_node(data_path, "inception_3a", weights_layout, 64, std::make_tuple(96U, 128U),
+                                    std::make_tuple(16U, 32U), 32U)
+                     .set_name("inception_3a/concat");
+        graph << get_inception_node(data_path, "inception_3b", weights_layout, 128, std::make_tuple(128U, 192U),
+                                    std::make_tuple(32U, 96U), 64U)
+                     .set_name("inception_3b/concat");
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool3/3x3_s2");
+        graph << get_inception_node(data_path, "inception_4a", weights_layout, 192, std::make_tuple(96U, 208U),
+                                    std::make_tuple(16U, 48U), 64U)
+                     .set_name("inception_4a/concat");
+        graph << get_inception_node(data_path, "inception_4b", weights_layout, 160, std::make_tuple(112U, 224U),
+                                    std::make_tuple(24U, 64U), 64U)
+                     .set_name("inception_4b/concat");
+        graph << get_inception_node(data_path, "inception_4c", weights_layout, 128, std::make_tuple(128U, 256U),
+                                    std::make_tuple(24U, 64U), 64U)
+                     .set_name("inception_4c/concat");
+        graph << get_inception_node(data_path, "inception_4d", weights_layout, 112, std::make_tuple(144U, 288U),
+                                    std::make_tuple(32U, 64U), 64U)
+                     .set_name("inception_4d/concat");
+        graph << get_inception_node(data_path, "inception_4e", weights_layout, 256, std::make_tuple(160U, 320U),
+                                    std::make_tuple(32U, 128U), 128U)
+                     .set_name("inception_4e/concat");
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool4/3x3_s2");
+        graph << get_inception_node(data_path, "inception_5a", weights_layout, 256, std::make_tuple(160U, 320U),
+                                    std::make_tuple(32U, 128U), 128U)
+                     .set_name("inception_5a/concat");
+        graph << get_inception_node(data_path, "inception_5b", weights_layout, 384, std::make_tuple(192U, 384U),
+                                    std::make_tuple(48U, 128U), 128U)
+                     .set_name("inception_5b/concat");
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, operation_layout,
+                                               PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool5/7x7_s1")
               << FullyConnectedLayer(
-                  1000U,
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy"))
-              .set_name("loss3/classifier")
-              << SoftmaxLayer().set_name("prob")
-              << OutputLayer(get_output_accessor(common_params, 5));
+                     1000U,
+                     get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy"))
+                     .set_name("loss3/classifier")
+              << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
@@ -130,6 +166,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -147,63 +184,63 @@ private:
     CommonGraphParams  common_params;
     Stream             graph;
 
-    ConcatLayer get_inception_node(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
-                                   unsigned int a_filt,
+    ConcatLayer get_inception_node(const std::string                     &data_path,
+                                   std::string                          &&param_path,
+                                   DataLayout                             weights_layout,
+                                   unsigned int                           a_filt,
                                    std::tuple<unsigned int, unsigned int> b_filters,
                                    std::tuple<unsigned int, unsigned int> c_filters,
-                                   unsigned int d_filt)
+                                   unsigned int                           d_filt)
     {
         std::string total_path = "/cnn_data/googlenet_model/" + param_path + "/" + param_path + "_";
         SubStream   i_a(graph);
-        i_a << ConvolutionLayer(
-                1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "1x1_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "1x1_b.npy"),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/1x1")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_1x1");
+        i_a << ConvolutionLayer(1U, 1U, a_filt,
+                                get_weights_accessor(data_path, total_path + "1x1_w.npy", weights_layout),
+                                get_weights_accessor(data_path, total_path + "1x1_b.npy"), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/1x1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_1x1");
 
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(
-                1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "3x3_reduce_b.npy"),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/3x3_reduce")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_3x3_reduce")
-            << ConvolutionLayer(
-                3U, 3U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "3x3_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "3x3_b.npy"),
-                PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/3x3")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_3x3");
+        i_b << ConvolutionLayer(1U, 1U, std::get<0>(b_filters),
+                                get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy", weights_layout),
+                                get_weights_accessor(data_path, total_path + "3x3_reduce_b.npy"),
+                                PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/3x3_reduce")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_3x3_reduce")
+            << ConvolutionLayer(3U, 3U, std::get<1>(b_filters),
+                                get_weights_accessor(data_path, total_path + "3x3_w.npy", weights_layout),
+                                get_weights_accessor(data_path, total_path + "3x3_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/3x3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_3x3");
 
         SubStream i_c(graph);
-        i_c << ConvolutionLayer(
-                1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "5x5_reduce_b.npy"),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/5x5_reduce")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_5x5_reduce")
-            << ConvolutionLayer(
-                5U, 5U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "5x5_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "5x5_b.npy"),
-                PadStrideInfo(1, 1, 2, 2))
-            .set_name(param_path + "/5x5")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_5x5");
+        i_c << ConvolutionLayer(1U, 1U, std::get<0>(c_filters),
+                                get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy", weights_layout),
+                                get_weights_accessor(data_path, total_path + "5x5_reduce_b.npy"),
+                                PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/5x5_reduce")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_5x5_reduce")
+            << ConvolutionLayer(5U, 5U, std::get<1>(c_filters),
+                                get_weights_accessor(data_path, total_path + "5x5_w.npy", weights_layout),
+                                get_weights_accessor(data_path, total_path + "5x5_b.npy"), PadStrideInfo(1, 1, 2, 2))
+                   .set_name(param_path + "/5x5")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_5x5");
 
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL))).set_name(param_path + "/pool")
+        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout,
+                                             PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL)))
+                   .set_name(param_path + "/pool")
             << ConvolutionLayer(
-                1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "pool_proj_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "pool_proj_b.npy"),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/pool_proj")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_pool_proj");
+                   1U, 1U, d_filt, get_weights_accessor(data_path, total_path + "pool_proj_w.npy", weights_layout),
+                   get_weights_accessor(data_path, total_path + "pool_proj_b.npy"), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/pool_proj")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_pool_proj");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
diff --git a/examples/graph_inception_resnet_v1.cpp b/examples/graph_inception_resnet_v1.cpp
index ea9bf8f5a9..a54a0f7806 100644
--- a/examples/graph_inception_resnet_v1.cpp
+++ b/examples/graph_inception_resnet_v1.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -38,7 +39,12 @@ class InceptionResNetV1Example final : public Example
 {
 public:
     InceptionResNetV1Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), model_input_width(nullptr), model_input_height(nullptr), graph(0, "InceptionResNetV1")
+        : cmd_parser(),
+          common_opts(cmd_parser),
+          common_params(),
+          model_input_width(nullptr),
+          model_input_height(nullptr),
+          graph(0, "InceptionResNetV1")
     {
         model_input_width  = cmd_parser.add_option<SimpleOption<unsigned int>>("image-width", 512);
         model_input_height = cmd_parser.add_option<SimpleOption<unsigned int>>("image-height", 512);
@@ -47,7 +53,7 @@ public:
         model_input_width->set_help("Input image width.");
         model_input_height->set_help("Input image height.");
     }
-    InceptionResNetV1Example(const InceptionResNetV1Example &) = delete;
+    InceptionResNetV1Example(const InceptionResNetV1Example &)            = delete;
     InceptionResNetV1Example &operator=(const InceptionResNetV1Example &) = delete;
     ~InceptionResNetV1Example() override                                  = default;
     bool do_setup(int argc, char **argv) override
@@ -60,7 +66,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -70,13 +76,14 @@ public:
         const unsigned int image_height = model_input_height->value();
 
         // Set default layout if needed
-        if(!common_opts.data_layout->is_set() && common_params.target == Target::NEON)
+        if (!common_opts.data_layout->is_set() && common_params.target == Target::NEON)
         {
             common_params.data_layout = DataLayout::NCHW;
         }
 
         // Checks
-        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type),
+                                "QASYMM8 not supported for this graph");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -86,105 +93,108 @@ public:
         // Create model path
         std::string data_path  = common_params.data_path;
         std::string model_path = "/cnn_data/inception_resnet_v1_model/";
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += model_path;
         }
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(0.f, 1.f);
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(0.f, 1.f);
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(image_width, image_height, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape     = permute_shape(
+                TensorShape(image_width, image_height, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
               // Conv2d_1a_3x3
-              << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "Conv2d_1a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(2, 2, 0, 0))
-              .set_name("Conv2d_1a_3x3/convolution")
+              << ConvolutionLayer(
+                     3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_1a_3x3_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                     .set_name("Conv2d_1a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_beta.npy"),
                                          batch_norm_epsilon)
-              .set_name("Conv2d_1a_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu")
+                     .set_name("Conv2d_1a_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_1a_3x3/Relu")
               // Conv2d_2a_3x3
-              << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "Conv2d_2a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_2a_3x3/convolution")
+              << ConvolutionLayer(
+                     3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_2a_3x3_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Conv2d_2a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_beta.npy"),
                                          batch_norm_epsilon)
-              .set_name("Conv2d_2a_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu")
+                     .set_name("Conv2d_2a_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_2a_3x3/Relu")
               // Conv2d_2b_3x3
-              << ConvolutionLayer(3U, 3U, 64U,
-                                  get_weights_accessor(data_path, "Conv2d_2b_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("Conv2d_2b_3x3/convolution")
+              << ConvolutionLayer(
+                     3U, 3U, 64U, get_weights_accessor(data_path, "Conv2d_2b_3x3_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                     .set_name("Conv2d_2b_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_beta.npy"),
                                          batch_norm_epsilon)
-              .set_name("Conv2d_2b_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu")
+                     .set_name("Conv2d_2b_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_2b_3x3/Relu")
               // MaxPool_3a_3x3
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)).set_name("MaxPool_3a_3x3/MaxPool")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
+                     .set_name("MaxPool_3a_3x3/MaxPool")
               // Conv2d_3b_1x1
-              << ConvolutionLayer(1U, 1U, 80U,
-                                  get_weights_accessor(data_path, "Conv2d_3b_1x1_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_3b_1x1/convolution")
+              << ConvolutionLayer(
+                     1U, 1U, 80U, get_weights_accessor(data_path, "Conv2d_3b_1x1_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Conv2d_3b_1x1/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_beta.npy"),
                                          batch_norm_epsilon)
-              .set_name("Conv2d_3b_1x1/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu")
+                     .set_name("Conv2d_3b_1x1/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_3b_1x1/Relu")
               // Conv2d_4a_3x3
-              << ConvolutionLayer(3U, 3U, 192U,
-                                  get_weights_accessor(data_path, "Conv2d_4a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_4a_3x3/convolution")
+              << ConvolutionLayer(
+                     3U, 3U, 192U, get_weights_accessor(data_path, "Conv2d_4a_3x3_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Conv2d_4a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_beta.npy"),
                                          batch_norm_epsilon)
-              .set_name("Conv2d_4a_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4a_3x3/Relu")
+                     .set_name("Conv2d_4a_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_4a_3x3/Relu")
               // Conv2d_4b_3x3
-              << ConvolutionLayer(3U, 3U, 256U,
-                                  get_weights_accessor(data_path, "Conv2d_4b_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(2, 2, 0, 0))
-              .set_name("Conv2d_4a_3x3/convolution")
+              << ConvolutionLayer(
+                     3U, 3U, 256U, get_weights_accessor(data_path, "Conv2d_4b_3x3_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                     .set_name("Conv2d_4a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_4b_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_4b_3x3_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_4b_3x3_BatchNorm_beta.npy"),
                                          batch_norm_epsilon)
-              .set_name("Conv2d_4b_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4b_3x3/Relu");
+                     .set_name("Conv2d_4b_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_4b_3x3/Relu");
 
         // 5 x Inception-resnet-A
         block35_repeat(data_path, weights_layout, 5);
@@ -202,12 +212,10 @@ public:
         // Logits tail
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("Logits/AvgPool_1a_8x8")
               << FlattenLayer().set_name("Logits/Flatten")
-              << FullyConnectedLayer(
-                  128U,
-                  get_weights_accessor(data_path, "Logits_Logits_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "Logits_Logits_biases.npy"))
-              .set_name("Logits/Logits")
-              << OutputLayer(arm_compute::support::cpp14::make_unique<DummyAccessor>(0));
+              << FullyConnectedLayer(128U, get_weights_accessor(data_path, "Logits_Logits_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, "Logits_Logits_biases.npy"))
+                     .set_name("Logits/Logits")
+              << OutputLayer(std::make_unique<DummyAccessor>(0));
 
         // Finalize graph
         GraphConfig config;
@@ -215,6 +223,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -230,14 +239,14 @@ private:
     CommandLineParser           cmd_parser;
     CommonGraphOptions          common_opts;
     CommonGraphParams           common_params;
-    SimpleOption<unsigned int> *model_input_width{ nullptr };
-    SimpleOption<unsigned int> *model_input_height{ nullptr };
+    SimpleOption<unsigned int> *model_input_width{nullptr};
+    SimpleOption<unsigned int> *model_input_height{nullptr};
     Stream                      graph;
 
 private:
     void block35_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks)
     {
-        for(unsigned int i = 0; i < num_blocks; ++i)
+        for (unsigned int i = 0; i < num_blocks; ++i)
         {
             std::stringstream unit_path_ss;
             unit_path_ss << "Repeat_block35_" << (i + 1) << "_";
@@ -253,102 +262,128 @@ private:
 
             // Branch 0
             SubStream i_la(i_l);
-            i_la << ConvolutionLayer(1U, 1U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
-                                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
+            i_la << ConvolutionLayer(
+                        1U, 1U, 32U,
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
+                        std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
 
             // Branch 1
             SubStream i_lb(i_l);
             i_lb << ConvolutionLayer(1U, 1U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
                  << ConvolutionLayer(3U, 3U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 1, 1))
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_3x3/Relu");
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/Relu");
 
             // Branch 2
             SubStream i_lc(i_l);
             i_lc << ConvolutionLayer(1U, 1U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0a_1x1/Relu")
+                        .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/Relu")
                  << ConvolutionLayer(3U, 3U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 1, 1))
-                 .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0b_3x3/Relu")
+                        .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/Relu")
                  << ConvolutionLayer(3U, 3U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 1, 1))
-                 .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0c_3x3/Relu");
+                        .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/Relu");
 
             // Concatenate
             i_l << ConcatLayer(std::move(i_la), std::move(i_lb), std::move(i_lc)).set_name(unit_name + "concat")
-                << ConvolutionLayer(1U, 1U, 256U,
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
-                                    PadStrideInfo(1, 1, 0, 0))
-                .set_name(unit_name + "Conv2d_1x1/convolution")
-                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.17f, 0.f)).set_name(unit_name + "mul");
+                << ConvolutionLayer(
+                       1U, 1U, 256U,
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
+                       PadStrideInfo(1, 1, 0, 0))
+                       .set_name(unit_name + "Conv2d_1x1/convolution")
+                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.17f, 0.f))
+                       .set_name(unit_name + "mul");
 
             graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add")
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu");
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "Relu");
         }
     }
 
     void block17_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks)
     {
-        for(unsigned int i = 0; i < num_blocks; ++i)
+        for (unsigned int i = 0; i < num_blocks; ++i)
         {
             std::stringstream unit_path_ss;
             unit_path_ss << "Repeat_1_block17_" << (i + 1) << "_";
@@ -364,79 +399,101 @@ private:
 
             // Branch 0
             SubStream i_la(i_l);
-            i_la << ConvolutionLayer(1U, 1U, 128U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
-                                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
+            i_la << ConvolutionLayer(
+                        1U, 1U, 128U,
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
+                        std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
 
             // Branch 1
             SubStream i_lb(i_l);
             i_lb << ConvolutionLayer(1U, 1U, 128U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
                  << ConvolutionLayer(7U, 1U, 128U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 3, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_1x7/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/Relu")
                  << ConvolutionLayer(1U, 7U, 128U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 3))
-                 .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0c_7x1/Relu");
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/Relu");
 
             // Concatenate
             i_l << ConcatLayer(std::move(i_la), std::move(i_lb)).set_name(unit_name + "concat")
-                << ConvolutionLayer(1U, 1U, 896U,
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
-                                    PadStrideInfo(1, 1, 0, 0))
-                .set_name(unit_name + "Conv2d_1x1/convolution")
-                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.10f, 0.f)).set_name(unit_name + "mul");
+                << ConvolutionLayer(
+                       1U, 1U, 896U,
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
+                       PadStrideInfo(1, 1, 0, 0))
+                       .set_name(unit_name + "Conv2d_1x1/convolution")
+                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.10f, 0.f))
+                       .set_name(unit_name + "mul");
 
             graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add")
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu");
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "Relu");
         }
     }
 
-    void block8_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks, float scale, bool has_activation)
+    void block8_repeat(const std::string &data_path,
+                       DataLayout         weights_layout,
+                       unsigned int       num_blocks,
+                       float              scale,
+                       bool               has_activation)
     {
-        for(unsigned int i = 0; i < num_blocks; ++i)
+        for (unsigned int i = 0; i < num_blocks; ++i)
         {
             std::stringstream unit_path_ss;
             std::stringstream unit_name_ss;
-            if(num_blocks != 1)
+            if (num_blocks != 1)
             {
                 unit_path_ss << "Repeat_2_block8_" << (i + 1) << "_";
                 unit_name_ss << "Repeat_2/block8_" << (i + 1) << "/";
@@ -456,79 +513,97 @@ private:
 
             // Branch 0
             SubStream i_la(i_l);
-            i_la << ConvolutionLayer(1U, 1U, 192U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
-                                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
+            i_la << ConvolutionLayer(
+                        1U, 1U, 192U,
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
+                        std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
 
             // Branch 1
             SubStream i_lb(i_l);
             i_lb << ConvolutionLayer(1U, 1U, 192U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
                  << ConvolutionLayer(3U, 1U, 192U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 1, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_1x3/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/Relu")
                  << ConvolutionLayer(1U, 3U, 192U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 1))
-                 .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"),
-                                            batch_norm_epsilon)
-                 .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0c_3x1/Relu");
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"),
+                        batch_norm_epsilon)
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/Relu");
 
             // Concatenate
             i_l << ConcatLayer(std::move(i_la), std::move(i_lb)).set_name(unit_name + "concat")
-                << ConvolutionLayer(1U, 1U, 1792U,
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
-                                    PadStrideInfo(1, 1, 0, 0))
-                .set_name(unit_name + "Conv2d_1x1/convolution");
+                << ConvolutionLayer(
+                       1U, 1U, 1792U,
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
+                       PadStrideInfo(1, 1, 0, 0))
+                       .set_name(unit_name + "Conv2d_1x1/convolution");
 
             // Scale result
-            if(scale != 1.f)
+            if (scale != 1.f)
             {
-                i_l << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, scale, 0.f)).set_name(unit_name + "mul");
+                i_l << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, scale, 0.f))
+                           .set_name(unit_name + "mul");
             }
 
             // Residual add
             graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add");
 
             // Apply activation if needed
-            if(has_activation)
+            if (has_activation)
             {
-                graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu");
+                graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                             .set_name(unit_name + "Relu");
             }
         }
     }
@@ -537,61 +612,71 @@ private:
     {
         // Branch 0
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(3U, 3U, 384U,
-                                get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu");
+        i_a << ConvolutionLayer(
+                   3U, 3U, 384U,
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu");
 
         // Branch 1
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 192U,
-                                get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 192U,
-                                get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 1, 1))
-            .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu")
-            << ConvolutionLayer(3U, 3U, 256U,
-                                get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 192U,
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 192U,
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U,
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu");
 
         // Branch 2
         SubStream i_c(graph);
-        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true)).set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3");
+        i_c << PoolingLayer(
+                   PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true))
+                   .set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3");
 
         // Concatenate
         graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c)).set_name("Mixed_6a/concat");
@@ -601,103 +686,120 @@ private:
     {
         // Branch 0
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 384U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu");
+        i_a << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 384U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu");
 
         // Branch 1
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 256U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu");
 
         // Branch 2
         SubStream i_c(graph);
-        i_c << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 256U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 1, 1))
-            .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/Relu")
-            << ConvolutionLayer(3U, 3U, 256U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       batch_norm_epsilon)
-            .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/Relu");
+        i_c << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   batch_norm_epsilon)
+                   .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/Relu");
 
         // Branch 3
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true)).set_name("Mixed_7a/Branch_3/MaxPool_1a_3x3");
+        i_d << PoolingLayer(
+                   PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true))
+                   .set_name("Mixed_7a/Branch_3/MaxPool_1a_3x3");
 
         // Concatenate
-        graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_7a/concat");
+        graph
+            << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_7a/concat");
     }
 };
 
diff --git a/examples/graph_inception_resnet_v2.cpp b/examples/graph_inception_resnet_v2.cpp
index d2f6e1d576..43e31ee14b 100644
--- a/examples/graph_inception_resnet_v2.cpp
+++ b/examples/graph_inception_resnet_v2.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class InceptionResNetV2Example final : public Example
 {
 public:
-    InceptionResNetV2Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionResNetV2")
+    InceptionResNetV2Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionResNetV2")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,20 +49,21 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
         }
 
         // Set default layout if needed
-        if(!common_opts.data_layout->is_set() && common_params.target == Target::NEON)
+        if (!common_opts.data_layout->is_set() && common_params.target == Target::NEON)
         {
             common_params.data_layout = DataLayout::NCHW;
         }
 
         // Checks
-        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type),
+                                "QASYMM8 not supported for this graph");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -70,94 +71,98 @@ public:
         // Create model path
         std::string data_path  = common_params.data_path;
         std::string model_path = "/cnn_data/inception_resnet_v2_model/";
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += model_path;
         }
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(0.f, 1.f);
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(0.f, 1.f);
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(299U, 299U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
               // Conv2d_1a_3x3
-              << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "Conv2d_1a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(2, 2, 0, 0))
-              .set_name("Conv2d_1a_3x3/convolution")
+              << ConvolutionLayer(
+                     3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_1a_3x3_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                     .set_name("Conv2d_1a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_beta.npy"),
                                          0.0010000000474974513f)
-              .set_name("Conv2d_1a_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu")
+                     .set_name("Conv2d_1a_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_1a_3x3/Relu")
               // Conv2d_2a_3x3
-              << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "Conv2d_2a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_2a_3x3/convolution")
+              << ConvolutionLayer(
+                     3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_2a_3x3_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Conv2d_2a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_beta.npy"),
                                          0.0010000000474974513f)
-              .set_name("Conv2d_2a_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu")
+                     .set_name("Conv2d_2a_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_2a_3x3/Relu")
               // Conv2d_2b_3x3
-              << ConvolutionLayer(3U, 3U, 64U,
-                                  get_weights_accessor(data_path, "Conv2d_2b_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("Conv2d_2b_3x3/convolution")
+              << ConvolutionLayer(
+                     3U, 3U, 64U, get_weights_accessor(data_path, "Conv2d_2b_3x3_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                     .set_name("Conv2d_2b_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_beta.npy"),
                                          0.0010000000474974513f)
-              .set_name("Conv2d_2b_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu")
+                     .set_name("Conv2d_2b_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_2b_3x3/Relu")
               // MaxPool_3a_3x3
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)).set_name("MaxPool_3a_3x3/MaxPool")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
+                     .set_name("MaxPool_3a_3x3/MaxPool")
               // Conv2d_3b_1x1
-              << ConvolutionLayer(1U, 1U, 80U,
-                                  get_weights_accessor(data_path, "Conv2d_3b_1x1_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_3b_1x1/convolution")
+              << ConvolutionLayer(
+                     1U, 1U, 80U, get_weights_accessor(data_path, "Conv2d_3b_1x1_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Conv2d_3b_1x1/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_beta.npy"),
                                          0.0010000000474974513f)
-              .set_name("Conv2d_3b_1x1/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu")
+                     .set_name("Conv2d_3b_1x1/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_3b_1x1/Relu")
               // Conv2d_4a_3x3
-              << ConvolutionLayer(3U, 3U, 192U,
-                                  get_weights_accessor(data_path, "Conv2d_4a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_4a_3x3/convolution")
+              << ConvolutionLayer(
+                     3U, 3U, 192U, get_weights_accessor(data_path, "Conv2d_4a_3x3_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Conv2d_4a_3x3/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_beta.npy"),
                                          0.0010000000474974513f)
-              .set_name("Conv2d_4a_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4a_3x3/Relu")
+                     .set_name("Conv2d_4a_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_4a_3x3/Relu")
               // MaxPool_5a_3x3
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0), true)).set_name("MaxPool_5a_3x3/MaxPool");
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0), true))
+                     .set_name("MaxPool_5a_3x3/MaxPool");
 
         block_mixed_5b(data_path, weights_layout);
         block35_repeat(data_path, weights_layout, 10);
@@ -168,27 +173,25 @@ public:
         block8_repeat(data_path, weights_layout, 1, 1.f, false);
 
         // Conv2d_7b_1x1
-        graph << ConvolutionLayer(1U, 1U, 1536U,
-                                  get_weights_accessor(data_path, "Conv2d_7b_1x1_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_7b_1x1/convolution")
+        graph << ConvolutionLayer(
+                     1U, 1U, 1536U, get_weights_accessor(data_path, "Conv2d_7b_1x1_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Conv2d_7b_1x1/convolution")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_7b_1x1_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv2d_7b_1x1_BatchNorm_moving_variance.npy"),
                                          get_random_accessor(1.f, 1.f),
                                          get_weights_accessor(data_path, "Conv2d_7b_1x1_BatchNorm_beta.npy"),
                                          0.0010000000474974513f)
-              .set_name("Conv2d_7b_1x1/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_7b_1x1/Relu")
+                     .set_name("Conv2d_7b_1x1/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_7b_1x1/Relu")
               << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("Logits/AvgPool_1a_8x8")
               << FlattenLayer().set_name("Logits/Flatten")
-              << FullyConnectedLayer(
-                  1001U,
-                  get_weights_accessor(data_path, "Logits_Logits_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "Logits_Logits_biases.npy"))
-              .set_name("Logits/Logits")
-              << SoftmaxLayer().set_name("Logits/Predictions")
-              << OutputLayer(get_output_accessor(common_params, 5));
+              << FullyConnectedLayer(1001U,
+                                     get_weights_accessor(data_path, "Logits_Logits_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, "Logits_Logits_biases.npy"))
+                     .set_name("Logits/Logits")
+              << SoftmaxLayer().set_name("Logits/Predictions") << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
@@ -196,6 +199,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -218,164 +222,191 @@ private:
     {
         // Branch 0
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(1U, 1U, 96U,
-                                get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_5b/Branch_0/Conv2d_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_0/Conv2d_1x1/Relu");
+        i_a << ConvolutionLayer(
+                   1U, 1U, 96U,
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_5b/Branch_0/Conv2d_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_5b/Branch_0/Conv2d_1x1/Relu");
 
         // Branch 1
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 48U,
-                                get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(5U, 5U, 64U,
-                                get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 2, 2))
-            .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 48U,
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   5U, 5U, 64U,
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 2, 2))
+                   .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu");
 
         // Branch 2
         SubStream i_c(graph);
-        i_c << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 1, 1))
-            .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu")
-            << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 1, 1))
-            .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu");
+        i_c << ConvolutionLayer(
+                   1U, 1U, 64U,
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 96U,
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 96U,
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu");
 
         // Branch 3
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name("Mixed_5b/Branch_3/AvgPool_0a_3x3")
-            << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu");
+        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout,
+                                             PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
+                   .set_name("Mixed_5b/Branch_3/AvgPool_0a_3x3")
+            << ConvolutionLayer(
+                   1U, 1U, 64U,
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu");
 
         // Concatenate
-        graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_5a/concat");
+        graph
+            << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_5a/concat");
     }
 
     void block_mixed_6a(const std::string &data_path, DataLayout weights_layout)
     {
         // Branch 0
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(3U, 3U, 384U,
-                                get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu");
+        i_a << ConvolutionLayer(
+                   3U, 3U, 384U,
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu");
 
         // Branch 1
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 256U,
-                                get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 1, 1))
-            .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu")
-            << ConvolutionLayer(3U, 3U, 384U,
-                                get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U,
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 384U,
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu");
 
         // Branch 2
         SubStream i_c(graph);
-        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true)).set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3");
+        i_c << PoolingLayer(
+                   PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true))
+                   .set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3");
 
         // Concatenate
         graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c)).set_name("Mixed_6a/concat");
@@ -385,108 +416,125 @@ private:
     {
         // Branch 0
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 384U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu");
+        i_a << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 384U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu");
 
         // Branch 1
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 288U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 288U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu");
 
         // Branch 2
         SubStream i_c(graph);
-        i_c << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 288U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(1, 1, 1, 1))
-            .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/Relu")
-            << ConvolutionLayer(3U, 3U, 320U,
-                                get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.0010000000474974513f)
-            .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/Relu");
+        i_c << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 288U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 320U,
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   0.0010000000474974513f)
+                   .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/Relu");
 
         // Branch 3
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)).set_name("Mixed_7a/Branch_3/MaxPool_1a_3x3");
+        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout,
+                                             PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
+                   .set_name("Mixed_7a/Branch_3/MaxPool_1a_3x3");
 
         // Concatenate
-        graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_7a/concat");
+        graph
+            << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_7a/concat");
     }
 
     void block35_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks)
     {
-        for(unsigned int i = 0; i < num_blocks; ++i)
+        for (unsigned int i = 0; i < num_blocks; ++i)
         {
             std::stringstream unit_path_ss;
             unit_path_ss << "Repeat_block35_" << (i + 1) << "_";
@@ -502,102 +550,128 @@ private:
 
             // Branch 0
             SubStream i_la(i_l);
-            i_la << ConvolutionLayer(1U, 1U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
-                                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
+            i_la << ConvolutionLayer(
+                        1U, 1U, 32U,
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
+                        std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
 
             // Branch 1
             SubStream i_lb(i_l);
             i_lb << ConvolutionLayer(1U, 1U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
                  << ConvolutionLayer(3U, 3U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 1, 1))
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_3x3/Relu");
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/Relu");
 
             // Branch 2
             SubStream i_lc(i_l);
             i_lc << ConvolutionLayer(1U, 1U, 32U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0a_1x1/Relu")
+                        .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/Relu")
                  << ConvolutionLayer(3U, 3U, 48U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 1, 1))
-                 .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0b_3x3/Relu")
+                        .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/Relu")
                  << ConvolutionLayer(3U, 3U, 64U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 1, 1))
-                 .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0c_3x3/Relu");
+                        .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/Relu");
 
             // Concatenate
             i_l << ConcatLayer(std::move(i_la), std::move(i_lb), std::move(i_lc)).set_name(unit_name + "concat")
-                << ConvolutionLayer(1U, 1U, 320U,
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
-                                    PadStrideInfo(1, 1, 0, 0))
-                .set_name(unit_name + "Conv2d_1x1/convolution")
-                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.17f, 0.f)).set_name(unit_name + "mul");
+                << ConvolutionLayer(
+                       1U, 1U, 320U,
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
+                       PadStrideInfo(1, 1, 0, 0))
+                       .set_name(unit_name + "Conv2d_1x1/convolution")
+                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.17f, 0.f))
+                       .set_name(unit_name + "mul");
 
             graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add")
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu");
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "Relu");
         }
     }
 
     void block17_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks)
     {
-        for(unsigned int i = 0; i < num_blocks; ++i)
+        for (unsigned int i = 0; i < num_blocks; ++i)
         {
             std::stringstream unit_path_ss;
             unit_path_ss << "Repeat_1_block17_" << (i + 1) << "_";
@@ -613,79 +687,101 @@ private:
 
             // Branch 0
             SubStream i_la(i_l);
-            i_la << ConvolutionLayer(1U, 1U, 192U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
-                                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
+            i_la << ConvolutionLayer(
+                        1U, 1U, 192U,
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
+                        std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
 
             // Branch 1
             SubStream i_lb(i_l);
             i_lb << ConvolutionLayer(1U, 1U, 128U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
                  << ConvolutionLayer(7U, 1U, 160U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 3, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_1x7/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/Relu")
                  << ConvolutionLayer(1U, 7U, 192U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 3))
-                 .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0c_7x1/Relu");
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/Relu");
 
             // Concatenate
             i_l << ConcatLayer(std::move(i_la), std::move(i_lb)).set_name(unit_name + "concat")
-                << ConvolutionLayer(1U, 1U, 1088U,
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
-                                    PadStrideInfo(1, 1, 0, 0))
-                .set_name(unit_name + "Conv2d_1x1/convolution")
-                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.10f, 0.f)).set_name(unit_name + "mul");
+                << ConvolutionLayer(
+                       1U, 1U, 1088U,
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
+                       PadStrideInfo(1, 1, 0, 0))
+                       .set_name(unit_name + "Conv2d_1x1/convolution")
+                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.10f, 0.f))
+                       .set_name(unit_name + "mul");
 
             graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add")
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu");
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "Relu");
         }
     }
 
-    void block8_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks, float scale, bool has_activation)
+    void block8_repeat(const std::string &data_path,
+                       DataLayout         weights_layout,
+                       unsigned int       num_blocks,
+                       float              scale,
+                       bool               has_activation)
     {
-        for(unsigned int i = 0; i < num_blocks; ++i)
+        for (unsigned int i = 0; i < num_blocks; ++i)
         {
             std::stringstream unit_path_ss;
             std::stringstream unit_name_ss;
-            if(num_blocks != 1)
+            if (num_blocks != 1)
             {
                 unit_path_ss << "Repeat_2_block8_" << (i + 1) << "_";
                 unit_name_ss << "Repeat_2/block8_" << (i + 1) << "/";
@@ -705,79 +801,97 @@ private:
 
             // Branch 0
             SubStream i_la(i_l);
-            i_la << ConvolutionLayer(1U, 1U, 192U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
-                                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
+            i_la << ConvolutionLayer(
+                        1U, 1U, 192U,
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout),
+                        std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu");
 
             // Branch 1
             SubStream i_lb(i_l);
             i_lb << ConvolutionLayer(1U, 1U, 192U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu")
                  << ConvolutionLayer(3U, 1U, 224U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 1, 0))
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_1x3/Relu")
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/Relu")
                  << ConvolutionLayer(1U, 3U, 256U,
-                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout),
+                                     get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_weights.npy",
+                                                          weights_layout),
                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                      PadStrideInfo(1, 1, 0, 1))
-                 .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/convolution")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"),
-                                            get_random_accessor(1.f, 1.f),
-                                            get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0c_3x1/Relu");
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/convolution")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path,
+                                             unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"),
+                        get_random_accessor(1.f, 1.f),
+                        get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/BatchNorm")
+                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                        .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/Relu");
 
             // Concatenate
             i_l << ConcatLayer(std::move(i_la), std::move(i_lb)).set_name(unit_name + "concat")
-                << ConvolutionLayer(1U, 1U, 2080U,
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
-                                    get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
-                                    PadStrideInfo(1, 1, 0, 0))
-                .set_name(unit_name + "Conv2d_1x1/convolution");
+                << ConvolutionLayer(
+                       1U, 1U, 2080U,
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout),
+                       get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout),
+                       PadStrideInfo(1, 1, 0, 0))
+                       .set_name(unit_name + "Conv2d_1x1/convolution");
 
             // Scale result
-            if(scale != 1.f)
+            if (scale != 1.f)
             {
-                i_l << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, scale, 0.f)).set_name(unit_name + "mul");
+                i_l << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, scale, 0.f))
+                           .set_name(unit_name + "mul");
             }
 
             // Residual add
             graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add");
 
             // Apply activation if needed
-            if(has_activation)
+            if (has_activation)
             {
-                graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu");
+                graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                             .set_name(unit_name + "Relu");
             }
         }
     }
diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp
index 03d5dff9be..75e03fb6b3 100644
--- a/examples/graph_inception_v3.cpp
+++ b/examples/graph_inception_v3.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,7 +21,10 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/Utils.h"
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +38,7 @@ using namespace arm_compute::graph_utils;
 class InceptionV3Example : public Example
 {
 public:
-    InceptionV3Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV3")
+    InceptionV3Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV3")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,7 +51,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -62,147 +64,177 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>();
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(299U, 299U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
-              << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
-              .set_name("Conv2d_1a_3x3/convolution")
-              << BatchNormalizationLayer(get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                         get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                         nullptr, get_weights_accessor(data_path,
-                                                                       "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                         0.001f)
-              .set_name("Conv2d_1a_3x3/BatchNorm/batchnorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu")
-              << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_2a_3x3/convolution")
-              << BatchNormalizationLayer(get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"),
-                                         get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"),
-                                         nullptr, get_weights_accessor(data_path,
-                                                                       "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_beta.npy"),
-                                         0.001f)
-              .set_name("Conv2d_2a_3x3/BatchNorm/batchnorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu")
-
-              << ConvolutionLayer(3U, 3U, 64U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
-              .set_name("Conv2d_2b_3x3/convolution")
-              << BatchNormalizationLayer(get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"),
-                                         get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"),
-                                         nullptr, get_weights_accessor(data_path,
-                                                                       "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_beta.npy"),
-                                         0.001f)
-              .set_name("Conv2d_2b_3x3/BatchNorm/batchnorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu")
-
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_3a_3x3/MaxPool")
-
-              << ConvolutionLayer(1U, 1U, 80U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_3b_1x1/convolution")
-              << BatchNormalizationLayer(get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_mean.npy"),
-                                         get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_variance.npy"),
-                                         nullptr, get_weights_accessor(data_path,
-                                                                       "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_beta.npy"),
-                                         0.001f)
-              .set_name("Conv2d_3b_1x1/BatchNorm/batchnorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu")
-
-              << ConvolutionLayer(3U, 3U, 192U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_4a_3x3/convolution")
-              << BatchNormalizationLayer(get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_mean.npy"),
-                                         get_weights_accessor(data_path,
-                                                              "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_variance.npy"),
-                                         nullptr, get_weights_accessor(data_path,
-                                                                       "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_beta.npy"),
-                                         0.001f)
-              .set_name("Conv2d_4a_3x3/BatchNorm/batchnorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4a_3x3/Relu")
-
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_5a_3x3/MaxPool");
-
-        graph << get_inception_node_A(data_path, "Mixed_5b", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
-                                      32U)
-              .set_name("Mixed_5b/concat");
-        graph << get_inception_node_A(data_path, "Mixed_5c", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
-                                      64U, true)
-              .set_name("Mixed_5c/concat");
-        graph << get_inception_node_A(data_path, "Mixed_5d", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U),
-                                      64U)
-              .set_name("Mixed_5d/concat");
-
-        graph << get_inception_node_B(data_path, "Mixed_6a", weights_layout, 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat");
+        graph
+            << common_params.target << common_params.fast_math_hint
+            << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
+            << ConvolutionLayer(3U, 3U, 32U,
+                                get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy",
+                                                     weights_layout),
+                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr,
+                   get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name("Conv2d_1a_3x3/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Conv2d_1a_3x3/Relu")
+            << ConvolutionLayer(3U, 3U, 32U,
+                                get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy",
+                                                     weights_layout),
+                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Conv2d_2a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr,
+                   get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name("Conv2d_2a_3x3/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Conv2d_2a_3x3/Relu")
+
+            << ConvolutionLayer(3U, 3U, 64U,
+                                get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy",
+                                                     weights_layout),
+                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                PadStrideInfo(1, 1, 1, 1))
+                   .set_name("Conv2d_2b_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr,
+                   get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name("Conv2d_2b_3x3/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Conv2d_2b_3x3/Relu")
+
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                             PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                   .set_name("MaxPool_3a_3x3/MaxPool")
+
+            << ConvolutionLayer(1U, 1U, 80U,
+                                get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy",
+                                                     weights_layout),
+                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Conv2d_3b_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr,
+                   get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name("Conv2d_3b_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Conv2d_3b_1x1/Relu")
+
+            << ConvolutionLayer(3U, 3U, 192U,
+                                get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy",
+                                                     weights_layout),
+                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Conv2d_4a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path,
+                                        "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr,
+                   get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name("Conv2d_4a_3x3/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Conv2d_4a_3x3/Relu")
+
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                             PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                   .set_name("MaxPool_5a_3x3/MaxPool");
+
+        graph << get_inception_node_A(data_path, "Mixed_5b", weights_layout, 64U, std::make_tuple(48U, 64U),
+                                      std::make_tuple(64U, 96U, 96U), 32U)
+                     .set_name("Mixed_5b/concat");
+        graph << get_inception_node_A(data_path, "Mixed_5c", weights_layout, 64U, std::make_tuple(48U, 64U),
+                                      std::make_tuple(64U, 96U, 96U), 64U, true)
+                     .set_name("Mixed_5c/concat");
+        graph << get_inception_node_A(data_path, "Mixed_5d", weights_layout, 64U, std::make_tuple(48U, 64U),
+                                      std::make_tuple(64U, 96U, 96U), 64U)
+                     .set_name("Mixed_5d/concat");
+
+        graph << get_inception_node_B(data_path, "Mixed_6a", weights_layout, 384U, std::make_tuple(64U, 96U, 96U))
+                     .set_name("Mixed_6a/concat");
 
         graph << get_inception_node_C(data_path, "Mixed_6b", weights_layout, 192U, std::make_tuple(128U, 128U, 192U),
                                       std::make_tuple(128U, 128U, 128U, 128U, 192U), 192U)
-              .set_name("Mixed_6b/concat");
+                     .set_name("Mixed_6b/concat");
         graph << get_inception_node_C(data_path, "Mixed_6c", weights_layout, 192U, std::make_tuple(160U, 160U, 192U),
                                       std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
-              .set_name("Mixed_6c/concat");
+                     .set_name("Mixed_6c/concat");
         graph << get_inception_node_C(data_path, "Mixed_6d", weights_layout, 192U, std::make_tuple(160U, 160U, 192U),
                                       std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U)
-              .set_name("Mixed_6d/concat");
+                     .set_name("Mixed_6d/concat");
         graph << get_inception_node_C(data_path, "Mixed_6e", weights_layout, 192U, std::make_tuple(192U, 192U, 192U),
                                       std::make_tuple(192U, 192U, 192U, 192U, 192U), 192U)
-              .set_name("Mixed_6e/concat");
+                     .set_name("Mixed_6e/concat");
 
         graph << get_inception_node_D(data_path, "Mixed_7a", weights_layout, std::make_tuple(192U, 320U),
                                       std::make_tuple(192U, 192U, 192U, 192U))
-              .set_name("Mixed_7a/concat");
+                     .set_name("Mixed_7a/concat");
 
         graph << get_inception_node_E(data_path, "Mixed_7b", weights_layout, 320U, std::make_tuple(384U, 384U, 384U),
                                       std::make_tuple(448U, 384U, 384U, 384U), 192U)
-              .set_name("Mixed_7b/concat");
+                     .set_name("Mixed_7b/concat");
         graph << get_inception_node_E(data_path, "Mixed_7c", weights_layout, 320U, std::make_tuple(384U, 384U, 384U),
                                       std::make_tuple(448U, 384U, 384U, 384U), 192U, true)
-              .set_name("Mixed_7c/concat");
-
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, operation_layout, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("Logits/AvgPool_1a_8x8/AvgPool")
-              << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path,
-                                                                      "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy", weights_layout),
-                                  get_weights_accessor(data_path,
-                                                       "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"),
-                                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Logits/Conv2d_1c_1x1/convolution")
+                     .set_name("Mixed_7c/concat");
+
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, operation_layout,
+                                               PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("Logits/AvgPool_1a_8x8/AvgPool")
+              << ConvolutionLayer(
+                     1U, 1U, 1001U,
+                     get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Logits/Conv2d_1c_1x1/convolution")
               << ReshapeLayer(TensorShape(1001U)).set_name("Predictions/Reshape")
-              << SoftmaxLayer().set_name("Predictions/Softmax")
-              << OutputLayer(get_output_accessor(common_params, 5));
+              << SoftmaxLayer().set_name("Predictions/Softmax") << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
-
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
         graph.finalize(common_params.target, config);
 
         return true;
@@ -220,19 +252,21 @@ private:
     Stream             graph;
 
 private:
-    ConcatLayer get_inception_node_A(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
-                                     unsigned int a_filt,
-                                     std::tuple<unsigned int, unsigned int> b_filters,
+    ConcatLayer get_inception_node_A(const std::string                                   &data_path,
+                                     std::string                                        &&param_path,
+                                     DataLayout                                           weights_layout,
+                                     unsigned int                                         a_filt,
+                                     std::tuple<unsigned int, unsigned int>               b_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int> c_filters,
-                                     unsigned int d_filt,
-                                     bool         is_name_different = false)
+                                     unsigned int                                         d_filt,
+                                     bool                                                 is_name_different = false)
     {
         std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
 
         // This is due to a naming issue in the tf model
         std::string conv_id0 = "_0a_";
         std::string conv_id1 = "2d_0b_";
-        if(is_name_different)
+        if (is_name_different)
         {
             conv_id0 = "_0b_";
             conv_id1 = "_1_0c_";
@@ -240,457 +274,451 @@ private:
 
         SubStream i_a(graph);
         i_a << ConvolutionLayer(
-                1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
+                   1U, 1U, a_filt,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
-                1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu")
+                   1U, 1U, std::get<0>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy",
+                                        weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution")
+            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 +
+                                                                           "1x1_BatchNorm_moving_mean.npy"),
+                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 +
+                                                                           "1x1_BatchNorm_moving_variance.npy"),
+                                       nullptr,
+                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 +
+                                                                           "1x1_BatchNorm_beta.npy"),
+                                       0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu")
             << ConvolutionLayer(
-                5U, 5U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 2, 2))
-            .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/Relu");
+                   5U, 5U, std::get<1>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy",
+                                        weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 2, 2))
+                   .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path,
+                                        total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path,
+                                        total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_variance.npy"),
+                   nullptr,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/Relu");
 
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
-                1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
+                   1U, 1U, std::get<0>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
-                3U, 3U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu")
+                   3U, 3U, std::get<1>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu")
             << ConvolutionLayer(
-                3U, 3U, std::get<2>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm/batcnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu");
+                   3U, 3U, std::get<2>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm/batcnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu");
 
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
+        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout,
+                                             PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
+                   .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
             << ConvolutionLayer(
-                1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
+                   1U, 1U, d_filt,
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
 
-    ConcatLayer get_inception_node_B(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
-                                     unsigned int a_filt,
+    ConcatLayer get_inception_node_B(const std::string                                   &data_path,
+                                     std::string                                        &&param_path,
+                                     DataLayout                                           weights_layout,
+                                     unsigned int                                         a_filt,
                                      std::tuple<unsigned int, unsigned int, unsigned int> b_filters)
     {
         std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
-                3U, 3U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(2, 2, 0, 0))
-            .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_1a_1x1/Relu");
+                   3U, 3U, a_filt,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/Relu");
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
-                1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
+                   1U, 1U, std::get<0>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
-                3U, 3U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu")
+                   3U, 3U, std::get<1>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu")
             << ConvolutionLayer(
-                3U, 3U, std::get<2>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(2, 2, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_1a_1x1/Relu");
+                   3U, 3U, std::get<2>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/Relu");
 
         SubStream i_c(graph);
-        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool");
+        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout,
+                                             PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                   .set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c));
     }
 
-    ConcatLayer get_inception_node_C(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
-                                     unsigned int a_filt,
-                                     std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
-                                     std::tuple<unsigned int, unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
-                                     unsigned int d_filt)
+    ConcatLayer
+    get_inception_node_C(const std::string                                   &data_path,
+                         std::string                                        &&param_path,
+                         DataLayout                                           weights_layout,
+                         unsigned int                                         a_filt,
+                         std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
+                         std::tuple<unsigned int, unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
+                         unsigned int                                                                     d_filt)
     {
         std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
-                1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
+                   1U, 1U, a_filt,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
-                1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
+                   1U, 1U, std::get<0>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
-                7U, 1U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 3, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
+                   7U, 1U, std::get<1>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
             << ConvolutionLayer(
-                1U, 7U, std::get<2>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 3))
-            .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0c_7x1/Relu");
+                   1U, 7U, std::get<2>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
+                   .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_0c_7x1/Relu");
 
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
-                1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
+                   1U, 1U, std::get<0>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
-                1U, 7U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 3))
-            .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu")
+                   1U, 7U, std::get<1>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu")
             << ConvolutionLayer(
-                7U, 1U, std::get<2>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 3, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu")
+                   7U, 1U, std::get<2>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu")
             << ConvolutionLayer(
-                1U, 7U, std::get<3>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 3))
-            .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu")
+                   1U, 7U, std::get<3>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
+                   .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu")
             << ConvolutionLayer(
-                7U, 1U, std::get<4>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 3, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu");
+                   7U, 1U, std::get<4>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu");
 
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
+        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout,
+                                             PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
+                   .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
             << ConvolutionLayer(
-                1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
+                   1U, 1U, d_filt,
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
 
-    ConcatLayer get_inception_node_D(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
-                                     std::tuple<unsigned int, unsigned int> a_filters,
+    ConcatLayer get_inception_node_D(const std::string                                                 &data_path,
+                                     std::string                                                      &&param_path,
+                                     DataLayout                                                         weights_layout,
+                                     std::tuple<unsigned int, unsigned int>                             a_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> b_filters)
     {
         std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
-                1U, 1U, std::get<0>(a_filters),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu")
+                   1U, 1U, std::get<0>(a_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
-                3U, 3U, std::get<1>(a_filters),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(2, 2, 0, 0))
-            .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_1a_3x3/Relu");
+                   3U, 3U, std::get<1>(a_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/Relu");
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
-                1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
+                   1U, 1U, std::get<0>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
-                7U, 1U, std::get<1>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 3, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
+                   7U, 1U, std::get<1>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
             << ConvolutionLayer(
-                1U, 7U, std::get<2>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 3))
-            .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu")
+                   1U, 7U, std::get<2>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
+                   .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu")
             << ConvolutionLayer(
-                3U, 3U, std::get<3>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(2, 2, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_1a_3x3/Relu");
+                   3U, 3U, std::get<3>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/Relu");
 
         SubStream i_c(graph);
-        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool");
+        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout,
+                                             PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                   .set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c));
     }
 
-    ConcatLayer get_inception_node_E(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
-                                     unsigned int a_filt,
-                                     std::tuple<unsigned int, unsigned int, unsigned int> b_filters,
+    ConcatLayer get_inception_node_E(const std::string                                                 &data_path,
+                                     std::string                                                      &&param_path,
+                                     DataLayout                                                         weights_layout,
+                                     unsigned int                                                       a_filt,
+                                     std::tuple<unsigned int, unsigned int, unsigned int>               b_filters,
                                      std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> c_filters,
-                                     unsigned int d_filt,
-                                     bool         is_name_different = false)
+                                     unsigned int                                                       d_filt,
+                                     bool is_name_different = false)
     {
         // This is due to a naming issue in the tf model
         std::string conv_id = "_0b_";
-        if(is_name_different)
+        if (is_name_different)
         {
             conv_id = "_0c_";
         }
@@ -698,154 +726,152 @@ private:
         std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_";
         SubStream   i_a(graph);
         i_a << ConvolutionLayer(
-                1U, 1U, a_filt,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
+                   1U, 1U, a_filt,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
-                1U, 1U, std::get<0>(b_filters),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu");
+                   1U, 1U, std::get<0>(b_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu");
 
         SubStream i_b1(i_b);
         i_b1 << ConvolutionLayer(
-                 3U, 1U, std::get<1>(b_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
-                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                 PadStrideInfo(1, 1, 1, 0))
-             .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution")
+                    3U, 1U, std::get<1>(b_filters),
+                    get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
+                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0))
+                    .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution")
              << BatchNormalizationLayer(
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
-                 nullptr,
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
-                 0.001f)
-             .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm/batchnorm")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu");
+                    get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
+                    get_weights_accessor(data_path,
+                                         total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
+                    nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
+                    0.001f)
+                    .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm/batchnorm")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                    .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu");
 
         SubStream i_b2(i_b);
         i_b2 << ConvolutionLayer(
-                 1U, 3U, std::get<2>(b_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy", weights_layout),
-                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                 PadStrideInfo(1, 1, 0, 1))
-             .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution")
-             << BatchNormalizationLayer(
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_moving_mean.npy"),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_moving_variance.npy"),
-                 nullptr,
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_beta.npy"),
-                 0.001f)
-             .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/BatchNorm/batchnorm")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/Relu");
+                    1U, 3U, std::get<2>(b_filters),
+                    get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy",
+                                         weights_layout),
+                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1))
+                    .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution")
+             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id +
+                                                                            "3x1_BatchNorm_moving_mean.npy"),
+                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id +
+                                                                            "3x1_BatchNorm_moving_variance.npy"),
+                                        nullptr,
+                                        get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id +
+                                                                            "3x1_BatchNorm_beta.npy"),
+                                        0.001f)
+                    .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/BatchNorm/batchnorm")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                    .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/Relu");
 
         // Merge b1 and b2
         i_b << ConcatLayer(std::move(i_b1), std::move(i_b2)).set_name(param_path + "/Branch_1/concat");
 
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
-                1U, 1U, std::get<0>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
+                   1U, 1U, std::get<0>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
-                3U, 3U, std::get<1>(c_filters),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu");
+                   3U, 3U, std::get<1>(c_filters),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu");
 
         SubStream i_c1(i_c);
         i_c1 << ConvolutionLayer(
-                 3U, 1U, std::get<2>(c_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout),
-                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                 PadStrideInfo(1, 1, 1, 0))
-             .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution")
+                    3U, 1U, std::get<2>(c_filters),
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout),
+                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0))
+                    .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution")
              << BatchNormalizationLayer(
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"),
-                 nullptr,
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"),
-                 0.001f)
-             .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm/batchnorm")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu");
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"),
+                    get_weights_accessor(data_path,
+                                         total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"),
+                    nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"),
+                    0.001f)
+                    .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm/batchnorm")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                    .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu");
 
         SubStream i_c2(i_c);
         i_c2 << ConvolutionLayer(
-                 1U, 3U, std::get<3>(c_filters),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy", weights_layout),
-                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                 PadStrideInfo(1, 1, 0, 1))
-             .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution")
+                    1U, 3U, std::get<3>(c_filters),
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy", weights_layout),
+                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1))
+                    .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution")
              << BatchNormalizationLayer(
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_mean.npy"),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_variance.npy"),
-                 nullptr,
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_beta.npy"),
-                 0.001f)
-             .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/BatchNorm/batchnorm")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_3x1/Relu");
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_mean.npy"),
+                    get_weights_accessor(data_path,
+                                         total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_variance.npy"),
+                    nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_beta.npy"),
+                    0.001f)
+                    .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/BatchNorm/batchnorm")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                    .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/Relu");
 
         // Merge i_c1 and i_c2
         i_c << ConcatLayer(std::move(i_c1), std::move(i_c2)).set_name(param_path + "/Branch_2/concat");
 
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
+        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout,
+                                             PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
+                   .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
             << ConvolutionLayer(
-                1U, 1U, d_filt,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
-                nullptr,
-                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
+                   1U, 1U, d_filt,
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+                   nullptr, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
+                   0.001f)
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp
index c78bbb285a..052498ad38 100644
--- a/examples/graph_inception_v4.cpp
+++ b/examples/graph_inception_v4.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -39,8 +39,7 @@ using namespace arm_compute::graph_utils;
 class InceptionV4Example final : public Example
 {
 public:
-    InceptionV4Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV4")
+    InceptionV4Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV4")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -53,7 +52,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -66,55 +65,74 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>();
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(299U, 299U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
               // Conv2d_1a_3x3
-              << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
-              .set_name("Conv2d_1a_3x3/Conv2D")
-              << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                         get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                         get_random_accessor(1.f, 1.f),
-                                         get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                         0.001f)
-              .set_name("Conv2d_1a_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu")
+              << ConvolutionLayer(
+                     3U, 3U, 32U,
+                     get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy",
+                                          weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                     .set_name("Conv2d_1a_3x3/Conv2D")
+              << BatchNormalizationLayer(
+                     get_weights_accessor(data_path,
+                                          "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                     get_weights_accessor(data_path,
+                                          "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                     get_random_accessor(1.f, 1.f),
+                     get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_beta.npy"),
+                     0.001f)
+                     .set_name("Conv2d_1a_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_1a_3x3/Relu")
               // Conv2d_2a_3x3
-              << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv2d_2a_3x3/Conv2D")
-              << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"),
-                                         get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"),
-                                         get_random_accessor(1.f, 1.f),
-                                         get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_beta.npy"),
-                                         0.001f)
-              .set_name("Conv2d_2a_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu")
+              << ConvolutionLayer(
+                     3U, 3U, 32U,
+                     get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy",
+                                          weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Conv2d_2a_3x3/Conv2D")
+              << BatchNormalizationLayer(
+                     get_weights_accessor(data_path,
+                                          "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"),
+                     get_weights_accessor(data_path,
+                                          "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"),
+                     get_random_accessor(1.f, 1.f),
+                     get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_beta.npy"),
+                     0.001f)
+                     .set_name("Conv2d_2a_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_2a_3x3/Relu")
               // Conv2d_2b_3x3
-              << ConvolutionLayer(3U, 3U, 64U,
-                                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy", weights_layout),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
-              .set_name("Conv2d_2b_3x3/Conv2D")
-              << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"),
-                                         get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"),
-                                         get_random_accessor(1.f, 1.f),
-                                         get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_beta.npy"),
-                                         0.001f)
-              .set_name("Conv2d_2b_3x3/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu");
+              << ConvolutionLayer(
+                     3U, 3U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy",
+                                          weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                     .set_name("Conv2d_2b_3x3/Conv2D")
+              << BatchNormalizationLayer(
+                     get_weights_accessor(data_path,
+                                          "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"),
+                     get_weights_accessor(data_path,
+                                          "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"),
+                     get_random_accessor(1.f, 1.f),
+                     get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_beta.npy"),
+                     0.001f)
+                     .set_name("Conv2d_2b_3x3/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv2d_2b_3x3/Relu");
 
         graph << get_mixed_3a(data_path, weights_layout).set_name("Mixed_3a/concat");
         graph << get_mixed_4a(data_path, weights_layout).set_name("Mixed_4a/concat");
@@ -140,27 +158,30 @@ public:
         graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7b").set_name("Mixed_7b/concat");
         graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7c").set_name("Mixed_7c/concat");
         graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7d").set_name("Mixed_7d/concat");
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("Logits/AvgPool_1a/AvgPool")
-              << FlattenLayer().set_name("Logits/Flatten")
-              << FullyConnectedLayer(
-                  1001U,
-                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_biases.npy"))
-              .set_name("Logits/MatMul")
-              << SoftmaxLayer().set_name("Logits/Predictions")
-              << OutputLayer(get_output_accessor(common_params, 5));
+        graph
+            << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("Logits/AvgPool_1a/AvgPool")
+            << FlattenLayer().set_name("Logits/Flatten")
+            << FullyConnectedLayer(
+                   1001U,
+                   get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy",
+                                        weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_biases.npy"))
+                   .set_name("Logits/MatMul")
+            << SoftmaxLayer().set_name("Logits/Predictions") << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         // Load the precompiled kernels from a file into the kernel library, in this way the next time they are needed
         // compilation won't be required.
-        if(common_params.enable_cl_cache)
+        if (common_params.enable_cl_cache)
         {
 #ifdef ARM_COMPUTE_CL
             restore_program_cache_from_file();
@@ -170,7 +191,7 @@ public:
         graph.finalize(common_params.target, config);
 
         // Save the opencl kernels to a file
-        if(common_opts.enable_cl_cache)
+        if (common_opts.enable_cl_cache)
         {
 #ifdef ARM_COMPUTE_CL
             save_program_cache_to_file();
@@ -197,22 +218,24 @@ private:
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_3a_";
 
         SubStream i_a(graph);
-        i_a << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name("Mixed_3a/Branch_0/MaxPool_0a_3x3/MaxPool");
+        i_a << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout,
+                                             PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
+                   .set_name("Mixed_3a/Branch_0/MaxPool_0a_3x3/MaxPool");
 
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/Relu");
+        i_b << ConvolutionLayer(
+                   3U, 3U, 96U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/Relu");
 
         return ConcatLayer(std::move(i_a), std::move(i_b));
     }
@@ -222,74 +245,86 @@ private:
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_4a_";
 
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/Relu");
+        i_a << ConvolutionLayer(
+                   1U, 1U, 64U,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 96U,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/Relu");
 
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(7U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
-            .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/Relu")
-            << ConvolutionLayer(1U, 7U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
-            .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/Relu")
-            << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 64U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   7U, 1U, 64U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
+                   .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/Relu")
+            << ConvolutionLayer(
+                   1U, 7U, 64U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
+                   .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 96U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/Relu");
 
         return ConcatLayer(std::move(i_a), std::move(i_b));
     }
@@ -299,22 +334,24 @@ private:
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_5a_";
 
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(3U, 3U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/Relu");
+        i_a << ConvolutionLayer(
+                   3U, 3U, 192U,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/Relu");
 
         SubStream i_b(graph);
-        i_b << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name("Mixed_5a/Branch_1/MaxPool_1a_3x3/MaxPool");
+        i_b << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout,
+                                             PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
+                   .set_name("Mixed_5a/Branch_1/MaxPool_1a_3x3/MaxPool");
 
         return ConcatLayer(std::move(i_a), std::move(i_b));
     }
@@ -324,92 +361,106 @@ private:
         std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
 
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(1U, 1U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
+        i_a << ConvolutionLayer(
+                   1U, 1U, 96U,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
 
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 64U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 96U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu");
 
         SubStream i_c(graph);
-        i_c << ConvolutionLayer(1U, 1U, 64U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu")
-            << ConvolutionLayer(3U, 3U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu");
+        i_c << ConvolutionLayer(
+                   1U, 1U, 64U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 96U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 96U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu");
 
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
-            << ConvolutionLayer(1U, 1U, 96U,
-                                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
+        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout,
+                                             PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
+                   .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
+            << ConvolutionLayer(
+                   1U, 1U, 96U,
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
@@ -419,57 +470,65 @@ private:
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_6a_";
 
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(3U, 3U, 384U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu");
+        i_a << ConvolutionLayer(
+                   3U, 3U, 384U,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu");
 
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 224U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
-            .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu")
-            << ConvolutionLayer(3U, 3U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 192U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 224U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu");
 
         SubStream i_c(graph);
-        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool");
+        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout,
+                                             PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
+                   .set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c));
     }
@@ -479,125 +538,145 @@ private:
         std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
 
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(1U, 1U, 384U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
+        i_a << ConvolutionLayer(
+                   1U, 1U, 384U,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
 
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(7U, 1U, 224U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
-            << ConvolutionLayer(1U, 7U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
-            .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 192U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   7U, 1U, 224U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu")
+            << ConvolutionLayer(
+                   1U, 7U, 256U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
+                   .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu");
 
         SubStream i_c(graph);
-        i_c << ConvolutionLayer(1U, 1U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(1U, 7U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
-            .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu")
-            << ConvolutionLayer(7U, 1U, 224U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu")
-            << ConvolutionLayer(1U, 7U, 224U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
-            .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu")
-            << ConvolutionLayer(7U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu");
+        i_c << ConvolutionLayer(
+                   1U, 1U, 192U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   1U, 7U, 192U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu")
+            << ConvolutionLayer(
+                   7U, 1U, 224U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu")
+            << ConvolutionLayer(
+                   1U, 7U, 224U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
+                   .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu")
+            << ConvolutionLayer(
+                   7U, 1U, 256U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu");
 
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
-            << ConvolutionLayer(1U, 1U, 128U,
-                                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
+        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout,
+                                             PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
+                   .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
+            << ConvolutionLayer(
+                   1U, 1U, 128U,
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
@@ -607,79 +686,91 @@ private:
         std::string total_path = "/cnn_data/inceptionv4_model/Mixed_7a_";
 
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(1U, 1U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(3U, 3U, 192U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu");
+        i_a << ConvolutionLayer(
+                   1U, 1U, 192U,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 192U,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu");
 
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu")
-            << ConvolutionLayer(7U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
-            .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu")
-            << ConvolutionLayer(1U, 7U, 320U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
-            .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu")
-            << ConvolutionLayer(3U, 3U, 320U,
-                                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
-            .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu");
+        i_b << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu")
+            << ConvolutionLayer(
+                   7U, 1U, 256U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu")
+            << ConvolutionLayer(
+                   1U, 7U, 320U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 320U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu");
 
         SubStream i_c(graph);
-        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name("Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool");
+        i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout,
+                                             PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true))
+                   .set_name("Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c));
     }
@@ -689,163 +780,163 @@ private:
         std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_";
 
         SubStream i_a(graph);
-        i_a << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
+        i_a << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu");
 
         SubStream i_b(graph);
         i_b << ConvolutionLayer(
-                1U, 1U, 384U,
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                get_random_accessor(1.f, 1.f),
-                get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu");
+                   1U, 1U, 384U,
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu");
 
         SubStream i_b1(i_b);
         i_b1 << ConvolutionLayer(
-                 3U, 1U, 256U,
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
-                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                 PadStrideInfo(1, 1, 1, 0))
-             .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Conv2D")
+                    3U, 1U, 256U,
+                    get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout),
+                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0))
+                    .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Conv2D")
              << BatchNormalizationLayer(
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
-                 get_random_accessor(1.f, 1.f),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"),
-                 0.001f)
-             .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu");
+                    get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"),
+                    get_weights_accessor(data_path,
+                                         total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"),
+                    get_random_accessor(1.f, 1.f),
+                    get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), 0.001f)
+                    .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                    .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu");
 
         SubStream i_b2(i_b);
         i_b2 << ConvolutionLayer(
-                 1U, 3U, 256U,
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout),
-                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                 PadStrideInfo(1, 1, 0, 1))
-             .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/Conv2D")
+                    1U, 3U, 256U,
+                    get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout),
+                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1))
+                    .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/Conv2D")
              << BatchNormalizationLayer(
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"),
-                 get_random_accessor(1.f, 1.f),
-                 get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"),
-                 0.001f)
-             .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/BatchNorm")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_3x1/Relu");
+                    get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"),
+                    get_weights_accessor(data_path,
+                                         total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"),
+                    get_random_accessor(1.f, 1.f),
+                    get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"), 0.001f)
+                    .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/BatchNorm")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                    .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/Relu");
 
         // Merge b1 and b2
         i_b << ConcatLayer(std::move(i_b1), std::move(i_b2)).set_name(param_path + "/Branch_1/concat");
 
         SubStream i_c(graph);
         i_c << ConvolutionLayer(
-                1U, 1U, 384U,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
-                get_random_accessor(1.f, 1.f),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
+                   1U, 1U, 384U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu")
             << ConvolutionLayer(
-                1U, 3U, 448U,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 1))
-            .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/Conv2D")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_moving_variance.npy"),
-                get_random_accessor(1.f, 1.f),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x1/Relu")
+                   1U, 3U, 448U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/Relu")
             << ConvolutionLayer(
-                3U, 1U, 512U,
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 1, 0))
-            .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Conv2D")
-            << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"),
-                get_random_accessor(1.f, 1.f),
-                get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"),
-                0.001f)
-            .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu");
+                   3U, 1U, 512U,
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu");
 
         SubStream i_c1(i_c);
         i_c1 << ConvolutionLayer(
-                 3U, 1U, 256U,
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy", weights_layout),
-                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                 PadStrideInfo(1, 1, 1, 0))
-             .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/Conv2D")
+                    3U, 1U, 256U,
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy", weights_layout),
+                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0))
+                    .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/Conv2D")
              << BatchNormalizationLayer(
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_mean.npy"),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_variance.npy"),
-                 get_random_accessor(1.f, 1.f),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_beta.npy"),
-                 0.001f)
-             .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/BatchNorm")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_1x3/Relu");
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_mean.npy"),
+                    get_weights_accessor(data_path,
+                                         total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_variance.npy"),
+                    get_random_accessor(1.f, 1.f),
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_beta.npy"), 0.001f)
+                    .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/BatchNorm")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                    .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/Relu");
 
         SubStream i_c2(i_c);
         i_c2 << ConvolutionLayer(
-                 1U, 3U, 256U,
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy", weights_layout),
-                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                 PadStrideInfo(1, 1, 0, 1))
-             .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/Conv2D")
+                    1U, 3U, 256U,
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy", weights_layout),
+                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1))
+                    .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/Conv2D")
              << BatchNormalizationLayer(
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_mean.npy"),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_variance.npy"),
-                 get_random_accessor(1.f, 1.f),
-                 get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_beta.npy"),
-                 0.001f)
-             .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/BatchNorm")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0e_3x1/Relu");
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_mean.npy"),
+                    get_weights_accessor(data_path,
+                                         total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_variance.npy"),
+                    get_random_accessor(1.f, 1.f),
+                    get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_beta.npy"), 0.001f)
+                    .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/BatchNorm")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                    .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/Relu");
 
         // Merge i_c1 and i_c2
         i_c << ConcatLayer(std::move(i_c1), std::move(i_c2)).set_name(param_path + "/Branch_2/concat");
 
         SubStream i_d(graph);
-        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL),
-                                             true))
-            .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
-            << ConvolutionLayer(1U, 1U, 256U,
-                                get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
-                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D")
-            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
-                                       get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
-                                       get_random_accessor(1.f, 1.f),
-                                       get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"),
-                                       0.001f)
-            .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
+        i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout,
+                                             PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true))
+                   .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool")
+            << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"),
+                   get_random_accessor(1.f, 1.f),
+                   get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), 0.001f)
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu");
 
         return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d));
     }
diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp
index 7b475c2c03..7d6dce7b17 100644
--- a/examples/graph_lenet.cpp
+++ b/examples/graph_lenet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class GraphLenetExample : public Example
 {
 public:
-    GraphLenetExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "LeNet")
+    GraphLenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "LeNet")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,14 +49,15 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
         }
 
         // Checks
-        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type),
+                                "QASYMM8 not supported for this graph");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -67,43 +68,39 @@ public:
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(28U, 28U, 1U, batches), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(28U, 28U, 1U, batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
         //conv1 << pool1 << conv2 << pool2 << fc1 << act1 << fc2 << smx
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor, get_input_accessor(common_params))
               << ConvolutionLayer(
-                  5U, 5U, 20U,
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("conv1")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
+                     5U, 5U, 20U, get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_w.npy", weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_b.npy"), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("conv1")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                     .set_name("pool1")
               << ConvolutionLayer(
-                  5U, 5U, 50U,
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("conv2")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
-              << FullyConnectedLayer(
-                  500U,
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_b.npy"))
-              .set_name("ip1")
+                     5U, 5U, 50U, get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_w.npy", weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_b.npy"), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("conv2")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                     .set_name("pool2")
+              << FullyConnectedLayer(500U,
+                                     get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_w.npy", weights_layout),
+                                     get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_b.npy"))
+                     .set_name("ip1")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu")
-              << FullyConnectedLayer(
-                  10U,
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_b.npy"))
-              .set_name("ip2")
-              << SoftmaxLayer().set_name("prob")
-              << OutputLayer(get_output_accessor(common_params));
+              << FullyConnectedLayer(10U,
+                                     get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_w.npy", weights_layout),
+                                     get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_b.npy"))
+                     .set_name("ip2")
+              << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params));
 
         // Finalize graph
         GraphConfig config;
@@ -111,6 +108,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -131,6 +129,14 @@ private:
 
 /** Main program for LeNet
  *
+ * Model is based on:
+ *      http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf
+ *      "Gradient-Based Learning Applied to Document Recognition"
+ *      Yann LeCun, Léon Bottou, Yoshua Bengio, and Patrick Haffner
+ *
+ * The original model uses tanh instead of relu activations. However the use of relu activations in lenet has been
+ * widely adopted to improve accuracy.*
+ *
  * @note To list all the possible arguments execute the binary appended with the --help option
  *
  * @param[in] argc Number of arguments
diff --git a/examples/graph_mnist.cpp b/examples/graph_mnist.cpp
deleted file mode 100644
index 56d5c96282..0000000000
--- a/examples/graph_mnist.cpp
+++ /dev/null
@@ -1,171 +0,0 @@
-/*
- * Copyright (c) 2019-2020 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/graph.h"
-#include "support/ToolchainSupport.h"
-#include "utils/CommonGraphOptions.h"
-#include "utils/GraphUtils.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace arm_compute::utils;
-using namespace arm_compute::graph::frontend;
-using namespace arm_compute::graph_utils;
-
-/** Example demonstrating how to implement Mnist's network using the Compute Library's graph API */
-class GraphMnistExample : public Example
-{
-public:
-    GraphMnistExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "LeNet")
-    {
-    }
-    bool do_setup(int argc, char **argv) override
-    {
-        // Parse arguments
-        cmd_parser.parse(argc, argv);
-        cmd_parser.validate();
-
-        // Consume common parameters
-        common_params = consume_common_graph_parameters(common_opts);
-
-        // Return when help menu is requested
-        if(common_params.help)
-        {
-            cmd_parser.print_help(argv[0]);
-            return false;
-        }
-
-        // Print parameter values
-        std::cout << common_params << std::endl;
-
-        // Get trainable parameters data path
-        std::string data_path = common_params.data_path;
-
-        // Add model path to data path
-        if(!data_path.empty() && arm_compute::is_data_type_quantized_asymmetric(common_params.data_type))
-        {
-            data_path += "/cnn_data/mnist_qasymm8_model/";
-        }
-
-        // Create input descriptor
-        const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(28U, 28U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
-
-        const QuantizationInfo in_quant_info = QuantizationInfo(0.003921568859368563f, 0);
-
-        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> conv_quant_info =
-        {
-            { QuantizationInfo(0.004083447158336639f, 138), QuantizationInfo(0.0046257381327450275f, 0) }, // conv0
-            { QuantizationInfo(0.0048590428195893764f, 149), QuantizationInfo(0.03558270260691643f, 0) },  // conv1
-            { QuantizationInfo(0.004008443560451269f, 146), QuantizationInfo(0.09117382764816284f, 0) },   // conv2
-            { QuantizationInfo(0.004344311077147722f, 160), QuantizationInfo(0.5494495034217834f, 167) },  // fc
-        };
-
-        // Set weights trained layout
-        const DataLayout        weights_layout = DataLayout::NHWC;
-        FullyConnectedLayerInfo fc_info        = FullyConnectedLayerInfo();
-        fc_info.set_weights_trained_layout(weights_layout);
-
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor.set_quantization_info(in_quant_info),
-                            get_input_accessor(common_params))
-              << ConvolutionLayer(
-                  3U, 3U, 32U,
-                  get_weights_accessor(data_path, "conv2d_weights_quant_FakeQuantWithMinMaxVars.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv2d_Conv2D_bias.npy"),
-                  PadStrideInfo(1U, 1U, 1U, 1U), 1, conv_quant_info.at(0).first, conv_quant_info.at(0).second)
-              .set_name("Conv0")
-
-              << ConvolutionLayer(
-                  3U, 3U, 32U,
-                  get_weights_accessor(data_path, "conv2d_1_weights_quant_FakeQuantWithMinMaxVars.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv2d_1_Conv2D_bias.npy"),
-                  PadStrideInfo(1U, 1U, 1U, 1U), 1, conv_quant_info.at(1).first, conv_quant_info.at(1).second)
-              .set_name("conv1")
-
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("maxpool1")
-
-              << ConvolutionLayer(
-                  3U, 3U, 32U,
-                  get_weights_accessor(data_path, "conv2d_2_weights_quant_FakeQuantWithMinMaxVars.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv2d_2_Conv2D_bias.npy"),
-                  PadStrideInfo(1U, 1U, 1U, 1U), 1, conv_quant_info.at(2).first, conv_quant_info.at(2).second)
-              .set_name("conv2")
-
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("maxpool2")
-
-              << FullyConnectedLayer(
-                  10U,
-                  get_weights_accessor(data_path, "dense_weights_quant_FakeQuantWithMinMaxVars_transpose.npy", weights_layout),
-                  get_weights_accessor(data_path, "dense_MatMul_bias.npy"),
-                  fc_info, conv_quant_info.at(3).first, conv_quant_info.at(3).second)
-              .set_name("fc")
-
-              << SoftmaxLayer().set_name("prob");
-
-        if(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type))
-        {
-            graph << DequantizationLayer().set_name("dequantize");
-        }
-
-        graph << OutputLayer(get_output_accessor(common_params, 5));
-
-        // Finalize graph
-        GraphConfig config;
-        config.num_threads = common_params.threads;
-        config.use_tuner   = common_params.enable_tuner;
-        config.tuner_mode  = common_params.tuner_mode;
-        config.tuner_file  = common_params.tuner_file;
-
-        graph.finalize(common_params.target, config);
-
-        return true;
-    }
-    void do_run() override
-    {
-        // Run graph
-        graph.run();
-    }
-
-private:
-    CommandLineParser  cmd_parser;
-    CommonGraphOptions common_opts;
-    CommonGraphParams  common_params;
-    Stream             graph;
-};
-
-/** Main program for Mnist Example
- *
- * @note To list all the possible arguments execute the binary appended with the --help option
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments
- */
-int main(int argc, char **argv)
-{
-    return arm_compute::utils::run_example<GraphMnistExample>(argc, argv);
-}
diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp
index bb893998be..e3a6ef116d 100644
--- a/examples/graph_mobilenet.cpp
+++ b/examples/graph_mobilenet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -36,14 +37,13 @@ using namespace arm_compute::graph_utils;
 class GraphMobilenetExample : public Example
 {
 public:
-    GraphMobilenetExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetV1")
+    GraphMobilenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetV1")
     {
         // Add model id option
         model_id_opt = cmd_parser.add_option<SimpleOption<int>>("model-id", 0);
         model_id_opt->set_help("Mobilenet model id (0: 1.0_224, else: 0.75_160");
     }
-    GraphMobilenetExample(const GraphMobilenetExample &) = delete;
+    GraphMobilenetExample(const GraphMobilenetExample &)            = delete;
     GraphMobilenetExample &operator=(const GraphMobilenetExample &) = delete;
     ~GraphMobilenetExample() override                               = default;
     bool do_setup(int argc, char **argv) override
@@ -56,7 +56,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -72,15 +72,17 @@ public:
         unsigned int spatial_size = (model_id == 0 || common_params.data_type == DataType::QASYMM8) ? 224 : 160;
 
         // Create input descriptor
-        const TensorShape tensor_shape     = permute_shape(TensorShape(spatial_size, spatial_size, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(spatial_size, spatial_size, 3U, common_params.batches), DataLayout::NCHW,
+                          common_params.data_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
         // Set graph hints
-        graph << common_params.target
-              << common_params.fast_math_hint;
+        graph << common_params.target << common_params.fast_math_hint;
 
         // Create core graph
-        if(arm_compute::is_data_type_float(common_params.data_type))
+        if (arm_compute::is_data_type_float(common_params.data_type))
         {
             create_graph_float(input_descriptor, model_id);
         }
@@ -90,8 +92,7 @@ public:
         }
 
         // Create common tail
-        graph << ReshapeLayer(TensorShape(1001U)).set_name("Reshape")
-              << SoftmaxLayer().set_name("Softmax")
+        graph << ReshapeLayer(TensorShape(1001U)).set_name("Reshape") << SoftmaxLayer().set_name("Softmax")
               << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
@@ -100,6 +101,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -114,63 +116,85 @@ public:
 private:
     CommandLineParser  cmd_parser;
     CommonGraphOptions common_opts;
-    SimpleOption<int> *model_id_opt{ nullptr };
+    SimpleOption<int> *model_id_opt{nullptr};
     CommonGraphParams  common_params;
     Stream             graph;
 
     void create_graph_float(TensorDescriptor &input_descriptor, int model_id)
     {
         float       depth_scale = (model_id == 0) ? 1.f : 0.75;
-        std::string model_path  = (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/";
+        std::string model_path =
+            (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/";
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>();
 
         // Get trainable parameters data path
         std::string data_path = common_params.data_path;
 
         // Add model path to data path
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += model_path;
         }
 
-        graph << InputLayer(input_descriptor,
-                            get_input_accessor(common_params, std::move(preprocessor), false))
-              << ConvolutionLayer(
-                  3U, 3U, 32U * depth_scale,
-                  get_weights_accessor(data_path, "Conv2d_0_weights.npy", DataLayout::NCHW),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))
-              .set_name("Conv2d_0")
-              << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_mean.npy"),
-                  get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_variance.npy"),
-                  get_weights_accessor(data_path, "Conv2d_0_BatchNorm_gamma.npy"),
-                  get_weights_accessor(data_path, "Conv2d_0_BatchNorm_beta.npy"),
-                  0.001f)
-              .set_name("Conv2d_0/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name("Conv2d_0/Relu6");
-        graph << get_dwsc_node_float(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_2", 128 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_3", 128 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_4", 256 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_5", 256 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_6", 512 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_7", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_8", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_9", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_10", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_11", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_12", 1024 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << get_dwsc_node_float(data_path, "Conv2d_13", 1024 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0));
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool_1a")
-              << ConvolutionLayer(
-                  1U, 1U, 1001U,
-                  get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW),
-                  get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Logits/Conv2d_1c_1x1");
+        graph << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
+              << ConvolutionLayer(3U, 3U, 32U * depth_scale,
+                                  get_weights_accessor(data_path, "Conv2d_0_weights.npy", DataLayout::NCHW),
+                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                  PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))
+                     .set_name("Conv2d_0")
+              << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_mean.npy"),
+                                         get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_variance.npy"),
+                                         get_weights_accessor(data_path, "Conv2d_0_BatchNorm_gamma.npy"),
+                                         get_weights_accessor(data_path, "Conv2d_0_BatchNorm_beta.npy"), 0.001f)
+                     .set_name("Conv2d_0/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
+                     .set_name("Conv2d_0/Relu6");
+        graph << get_dwsc_node_float(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_2", 128 * depth_scale,
+                                     PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_3", 128 * depth_scale,
+                                     PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_4", 256 * depth_scale,
+                                     PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_5", 256 * depth_scale,
+                                     PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_6", 512 * depth_scale,
+                                     PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_7", 512 * depth_scale,
+                                     PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_8", 512 * depth_scale,
+                                     PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_9", 512 * depth_scale,
+                                     PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_10", 512 * depth_scale,
+                                     PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_11", 512 * depth_scale,
+                                     PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_12", 1024 * depth_scale,
+                                     PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph << get_dwsc_node_float(data_path, "Conv2d_13", 1024 * depth_scale,
+                                     PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1, 1, 0, 0));
+        graph
+            << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool_1a")
+            << ConvolutionLayer(
+                   1U, 1U, 1001U, get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW),
+                   get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("Logits/Conv2d_1c_1x1");
     }
 
     void create_graph_qasymm(TensorDescriptor &input_descriptor)
@@ -179,7 +203,7 @@ private:
         std::string data_path = common_params.data_path;
 
         // Add model path to data path
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += "/cnn_data/mobilenet_qasymm8_model/";
         }
@@ -187,19 +211,16 @@ private:
         // Quantization info taken from the AndroidNN QASYMM8 MobileNet example
         const QuantizationInfo in_quant_info = QuantizationInfo(0.0078125f, 128);
 
-        const std::vector<QuantizationInfo> conv_weights_quant_info =
-        {
+        const std::vector<QuantizationInfo> conv_weights_quant_info = {
             QuantizationInfo(0.02182667888700962f, 151), // conv0
             QuantizationInfo(0.004986600950360298f, 74)  // conv14
         };
-        const std::vector<QuantizationInfo> conv_out_quant_info =
-        {
+        const std::vector<QuantizationInfo> conv_out_quant_info = {
             QuantizationInfo(0.023528477177023888f, 0), // conv0
             QuantizationInfo(0.16609922051429749f, 66)  // conv14
         };
 
-        const std::vector<QuantizationInfo> depth_weights_quant_info =
-        {
+        const std::vector<QuantizationInfo> depth_weights_quant_info = {
             QuantizationInfo(0.29219913482666016f, 110),  // dwsc1
             QuantizationInfo(0.40277284383773804f, 130),  // dwsc2
             QuantizationInfo(0.06053730100393295f, 160),  // dwsc3
@@ -215,8 +236,7 @@ private:
             QuantizationInfo(0.12616927921772003f, 211)   // dwsc13
         };
 
-        const std::vector<QuantizationInfo> point_weights_quant_info =
-        {
+        const std::vector<QuantizationInfo> point_weights_quant_info = {
             QuantizationInfo(0.030420949682593346f, 121), // dwsc1
             QuantizationInfo(0.015148180536925793f, 104), // dwsc2
             QuantizationInfo(0.013755458407104015f, 94),  // dwsc3
@@ -234,108 +254,121 @@ private:
 
         graph << InputLayer(input_descriptor.set_quantization_info(in_quant_info),
                             get_input_accessor(common_params, nullptr, false))
-              << ConvolutionLayer(
-                  3U, 3U, 32U,
-                  get_weights_accessor(data_path, "Conv2d_0_weights.npy"),
-                  get_weights_accessor(data_path, "Conv2d_0_bias.npy"),
-                  PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
-                  1, conv_weights_quant_info.at(0), conv_out_quant_info.at(0))
-              .set_name("Conv2d_0")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name("Conv2d_0/Relu6");
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1),
-                                      point_weights_quant_info.at(1));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2),
-                                      point_weights_quant_info.at(2));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3),
-                                      point_weights_quant_info.at(3));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4),
-                                      point_weights_quant_info.at(4));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5),
-                                      point_weights_quant_info.at(5));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6),
-                                      point_weights_quant_info.at(6));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7),
-                                      point_weights_quant_info.at(7));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8),
-                                      point_weights_quant_info.at(8));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9),
-                                      point_weights_quant_info.at(9));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10),
-                                      point_weights_quant_info.at(10));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11),
-                                      point_weights_quant_info.at(11));
-        graph << get_dwsc_node_qasymm(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12),
-                                      point_weights_quant_info.at(12))
-              << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool_1a")
-              << ConvolutionLayer(
-                  1U, 1U, 1001U,
-                  get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"),
-                  get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_bias.npy"),
-                  PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1), conv_out_quant_info.at(1))
-              .set_name("Logits/Conv2d_1c_1x1");
+              << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_0_weights.npy"),
+                                  get_weights_accessor(data_path, "Conv2d_0_bias.npy"),
+                                  PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), 1,
+                                  conv_weights_quant_info.at(0), conv_out_quant_info.at(0))
+                     .set_name("Conv2d_0")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+                     .set_name("Conv2d_0/Relu6");
+        graph << get_dwsc_node_qasymm(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U),
+                                      PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0),
+                                      point_weights_quant_info.at(0));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1), point_weights_quant_info.at(1));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2), point_weights_quant_info.at(2));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3), point_weights_quant_info.at(3));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4), point_weights_quant_info.at(4));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5), point_weights_quant_info.at(5));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6), point_weights_quant_info.at(6));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7), point_weights_quant_info.at(7));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8), point_weights_quant_info.at(8));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9), point_weights_quant_info.at(9));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10), point_weights_quant_info.at(10));
+        graph << get_dwsc_node_qasymm(
+            data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
+            PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11), point_weights_quant_info.at(11));
+        graph
+            << get_dwsc_node_qasymm(
+                   data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR),
+                   PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12), point_weights_quant_info.at(12))
+            << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool_1a")
+            << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"),
+                                get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_bias.npy"),
+                                PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1),
+                                conv_out_quant_info.at(1))
+                   .set_name("Logits/Conv2d_1c_1x1");
     }
 
-    ConcatLayer get_dwsc_node_float(const std::string &data_path, std::string &&param_path,
-                                    unsigned int  conv_filt,
-                                    PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info)
+    ConcatLayer get_dwsc_node_float(const std::string &data_path,
+                                    std::string      &&param_path,
+                                    unsigned int       conv_filt,
+                                    PadStrideInfo      dwc_pad_stride_info,
+                                    PadStrideInfo      conv_pad_stride_info)
     {
         std::string total_path = param_path + "_";
         SubStream   sg(graph);
         sg << DepthwiseConvolutionLayer(
-               3U, 3U,
-               get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW),
-               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-               dwc_pad_stride_info)
-           .set_name(total_path + "depthwise/depthwise")
+                  3U, 3U,
+                  get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW),
+                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), dwc_pad_stride_info)
+                  .set_name(total_path + "depthwise/depthwise")
            << BatchNormalizationLayer(
-               get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"),
-               get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"),
-               get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"),
-               get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"),
-               0.001f)
-           .set_name(total_path + "depthwise/BatchNorm")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "depthwise/Relu6")
-           << ConvolutionLayer(
-               1U, 1U, conv_filt,
-               get_weights_accessor(data_path, total_path + "pointwise_weights.npy", DataLayout::NCHW),
-               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-               conv_pad_stride_info)
-           .set_name(total_path + "pointwise/Conv2D")
+                  get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"),
+                  get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"),
+                  get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"),
+                  get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"), 0.001f)
+                  .set_name(total_path + "depthwise/BatchNorm")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
+                  .set_name(total_path + "depthwise/Relu6")
+           << ConvolutionLayer(1U, 1U, conv_filt,
+                               get_weights_accessor(data_path, total_path + "pointwise_weights.npy", DataLayout::NCHW),
+                               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), conv_pad_stride_info)
+                  .set_name(total_path + "pointwise/Conv2D")
            << BatchNormalizationLayer(
-               get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_mean.npy"),
-               get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_variance.npy"),
-               get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_gamma.npy"),
-               get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_beta.npy"),
-               0.001f)
-           .set_name(total_path + "pointwise/BatchNorm")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "pointwise/Relu6");
+                  get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_mean.npy"),
+                  get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_variance.npy"),
+                  get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_gamma.npy"),
+                  get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_beta.npy"), 0.001f)
+                  .set_name(total_path + "pointwise/BatchNorm")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
+                  .set_name(total_path + "pointwise/Relu6");
 
         return ConcatLayer(std::move(sg));
     }
 
-    ConcatLayer get_dwsc_node_qasymm(const std::string &data_path, std::string &&param_path,
+    ConcatLayer get_dwsc_node_qasymm(const std::string &data_path,
+                                     std::string      &&param_path,
                                      const unsigned int conv_filt,
-                                     PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info,
-                                     QuantizationInfo depth_weights_quant_info, QuantizationInfo point_weights_quant_info)
+                                     PadStrideInfo      dwc_pad_stride_info,
+                                     PadStrideInfo      conv_pad_stride_info,
+                                     QuantizationInfo   depth_weights_quant_info,
+                                     QuantizationInfo   point_weights_quant_info)
     {
         std::string total_path = param_path + "_";
         SubStream   sg(graph);
 
-        sg << DepthwiseConvolutionLayer(
-               3U, 3U,
-               get_weights_accessor(data_path, total_path + "depthwise_weights.npy"),
-               get_weights_accessor(data_path, total_path + "depthwise_bias.npy"),
-               dwc_pad_stride_info, 1, std::move(depth_weights_quant_info))
-           .set_name(total_path + "depthwise/depthwise")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name(total_path + "depthwise/Relu6")
-           << ConvolutionLayer(
-               1U, 1U, conv_filt,
-               get_weights_accessor(data_path, total_path + "pointwise_weights.npy"),
-               get_weights_accessor(data_path, total_path + "pointwise_bias.npy"),
-               conv_pad_stride_info, 1, std::move(point_weights_quant_info))
-           .set_name(total_path + "pointwise/Conv2D")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name(total_path + "pointwise/Relu6");
+        sg << DepthwiseConvolutionLayer(3U, 3U, get_weights_accessor(data_path, total_path + "depthwise_weights.npy"),
+                                        get_weights_accessor(data_path, total_path + "depthwise_bias.npy"),
+                                        dwc_pad_stride_info, 1, std::move(depth_weights_quant_info))
+                  .set_name(total_path + "depthwise/depthwise")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+                  .set_name(total_path + "depthwise/Relu6")
+           << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "pointwise_weights.npy"),
+                               get_weights_accessor(data_path, total_path + "pointwise_bias.npy"), conv_pad_stride_info,
+                               1, std::move(point_weights_quant_info))
+                  .set_name(total_path + "pointwise/Conv2D")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+                  .set_name(total_path + "pointwise/Relu6");
 
         return ConcatLayer(std::move(sg));
     }
diff --git a/examples/graph_mobilenet_v2.cpp b/examples/graph_mobilenet_v2.cpp
index 0d6b4715c9..9bc21c42c5 100644
--- a/examples/graph_mobilenet_v2.cpp
+++ b/examples/graph_mobilenet_v2.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -36,11 +37,10 @@ using namespace arm_compute::graph_utils;
 class GraphMobilenetV2Example : public Example
 {
 public:
-    GraphMobilenetV2Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetV2")
+    GraphMobilenetV2Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetV2")
     {
     }
-    GraphMobilenetV2Example(const GraphMobilenetV2Example &) = delete;
+    GraphMobilenetV2Example(const GraphMobilenetV2Example &)            = delete;
     GraphMobilenetV2Example &operator=(const GraphMobilenetV2Example &) = delete;
     ~GraphMobilenetV2Example() override                                 = default;
 
@@ -54,7 +54,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -64,15 +64,16 @@ public:
         std::cout << common_params << std::endl;
 
         // Create input descriptor
-        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+        const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, common_params.batches),
+                                                       DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor  input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
         // Set graph hints
-        graph << common_params.target
-              << common_params.fast_math_hint;
+        graph << common_params.target << common_params.fast_math_hint;
 
         // Create core graph
-        if(arm_compute::is_data_type_float(common_params.data_type))
+        if (arm_compute::is_data_type_float(common_params.data_type))
         {
             create_graph_float(input_descriptor);
         }
@@ -82,8 +83,7 @@ public:
         }
         // Create common tail
         graph << ReshapeLayer(TensorShape(1001U)).set_name("Predictions/Reshape")
-              << SoftmaxLayer().set_name("Predictions/Softmax")
-              << OutputLayer(get_output_accessor(common_params, 5));
+              << SoftmaxLayer().set_name("Predictions/Softmax") << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
@@ -91,6 +91,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -129,129 +130,149 @@ private:
         const std::string model_path = "/cnn_data/mobilenet_v2_1.0_224_model/";
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>();
 
         // Get trainable parameters data path
         std::string data_path = common_params.data_path;
 
         // Add model path to data path
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += model_path;
         }
 
         graph << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false))
-              << ConvolutionLayer(3U, 3U, 32U,
-                                  get_weights_accessor(data_path, "Conv_weights.npy", DataLayout::NCHW),
+              << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "Conv_weights.npy", DataLayout::NCHW),
                                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
                                   PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL))
-              .set_name("Conv")
+                     .set_name("Conv")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv_BatchNorm_moving_variance.npy"),
                                          get_weights_accessor(data_path, "Conv_BatchNorm_gamma.npy"),
                                          get_weights_accessor(data_path, "Conv_BatchNorm_beta.npy"),
                                          0.0010000000474974513f)
-              .set_name("Conv/BatchNorm")
+                     .set_name("Conv/BatchNorm")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
-              .set_name("Conv/Relu6");
+                     .set_name("Conv/Relu6");
 
         get_expanded_conv_float(data_path, "expanded_conv", 32U, 16U, PadStrideInfo(1, 1, 1, 1));
-        get_expanded_conv_float(data_path, "expanded_conv_1", 16U, 24U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_2", 24U, 24U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_3", 24U, 32U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_4", 32U, 32U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_5", 32U, 32U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_6", 32U, 64U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_7", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_8", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_9", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_1", 16U, 24U,
+                                PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_2", 24U, 24U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_3", 24U, 32U,
+                                PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_4", 32U, 32U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_5", 32U, 32U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_6", 32U, 64U,
+                                PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_7", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_8", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_9", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
         get_expanded_conv_float(data_path, "expanded_conv_10", 64U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_11", 96U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_12", 96U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_13", 96U, 160U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_14", 160U, 160U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
-        get_expanded_conv_float(data_path, "expanded_conv_15", 160U, 160U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_11", 96U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_12", 96U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_13", 96U, 160U,
+                                PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_14", 160U, 160U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
+        get_expanded_conv_float(data_path, "expanded_conv_15", 160U, 160U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes,
+                                IsResidual::Yes);
         get_expanded_conv_float(data_path, "expanded_conv_16", 160U, 320U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes);
 
-        graph << ConvolutionLayer(1U, 1U, 1280U,
-                                  get_weights_accessor(data_path, "Conv_1_weights.npy", DataLayout::NCHW),
-                                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("Conv_1")
+        graph << ConvolutionLayer(
+                     1U, 1U, 1280U, get_weights_accessor(data_path, "Conv_1_weights.npy", DataLayout::NCHW),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("Conv_1")
               << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv_1_BatchNorm_moving_mean.npy"),
                                          get_weights_accessor(data_path, "Conv_1_BatchNorm_moving_variance.npy"),
                                          get_weights_accessor(data_path, "Conv_1_BatchNorm_gamma.npy"),
                                          get_weights_accessor(data_path, "Conv_1_BatchNorm_beta.npy"),
                                          0.0010000000474974513f)
-              .set_name("Conv_1/BatchNorm")
+                     .set_name("Conv_1/BatchNorm")
               << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
-              .set_name("Conv_1/Relu6")
+                     .set_name("Conv_1/Relu6")
               << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool")
               << ConvolutionLayer(1U, 1U, 1001U,
                                   get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW),
                                   get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"),
                                   PadStrideInfo(1, 1, 0, 0))
-              .set_name("Logits/Conv2d_1c_1x1");
+                     .set_name("Logits/Conv2d_1c_1x1");
     }
 
-    void get_expanded_conv_float(const std::string &data_path, std::string &&param_path,
-                                 unsigned int input_channels, unsigned int output_channels,
-                                 PadStrideInfo dwc_pad_stride_info,
-                                 HasExpand has_expand = HasExpand::No, IsResidual is_residual = IsResidual::No,
-                                 unsigned int expansion_size = 6)
+    void get_expanded_conv_float(const std::string &data_path,
+                                 std::string      &&param_path,
+                                 unsigned int       input_channels,
+                                 unsigned int       output_channels,
+                                 PadStrideInfo      dwc_pad_stride_info,
+                                 HasExpand          has_expand     = HasExpand::No,
+                                 IsResidual         is_residual    = IsResidual::No,
+                                 unsigned int       expansion_size = 6)
     {
         std::string total_path = param_path + "_";
         SubStream   left(graph);
 
         // Add expand node
-        if(has_expand == HasExpand::Yes)
+        if (has_expand == HasExpand::Yes)
         {
-            left << ConvolutionLayer(1U, 1U, input_channels * expansion_size,
-                                     get_weights_accessor(data_path, total_path + "expand_weights.npy", DataLayout::NCHW),
-                                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-                 .set_name(param_path + "/expand/Conv2D")
-                 << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "expand_BatchNorm_moving_mean.npy"),
-                                            get_weights_accessor(data_path, total_path + "expand_BatchNorm_moving_variance.npy"),
-                                            get_weights_accessor(data_path, total_path + "expand_BatchNorm_gamma.npy"),
-                                            get_weights_accessor(data_path, total_path + "expand_BatchNorm_beta.npy"),
-                                            0.0010000000474974513f)
-                 .set_name(param_path + "/expand/BatchNorm")
+            left << ConvolutionLayer(
+                        1U, 1U, input_channels * expansion_size,
+                        get_weights_accessor(data_path, total_path + "expand_weights.npy", DataLayout::NCHW),
+                        std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                        .set_name(param_path + "/expand/Conv2D")
+                 << BatchNormalizationLayer(
+                        get_weights_accessor(data_path, total_path + "expand_BatchNorm_moving_mean.npy"),
+                        get_weights_accessor(data_path, total_path + "expand_BatchNorm_moving_variance.npy"),
+                        get_weights_accessor(data_path, total_path + "expand_BatchNorm_gamma.npy"),
+                        get_weights_accessor(data_path, total_path + "expand_BatchNorm_beta.npy"),
+                        0.0010000000474974513f)
+                        .set_name(param_path + "/expand/BatchNorm")
                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
-                 .set_name(param_path + "/expand/Relu6");
+                        .set_name(param_path + "/expand/Relu6");
         }
 
         // Add depthwise node
-        left << DepthwiseConvolutionLayer(3U, 3U,
-                                          get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW),
-                                          std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                                          dwc_pad_stride_info)
-             .set_name(param_path + "/depthwise/depthwise")
-             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"),
-                                        get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"),
-                                        get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"),
-                                        get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"),
-                                        0.0010000000474974513f)
-             .set_name(param_path + "/depthwise/BatchNorm")
+        left << DepthwiseConvolutionLayer(
+                    3U, 3U,
+                    get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW),
+                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), dwc_pad_stride_info)
+                    .set_name(param_path + "/depthwise/depthwise")
+             << BatchNormalizationLayer(
+                    get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"),
+                    get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"),
+                    get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"),
+                    get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"),
+                    0.0010000000474974513f)
+                    .set_name(param_path + "/depthwise/BatchNorm")
              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
-             .set_name(param_path + "/depthwise/Relu6");
+                    .set_name(param_path + "/depthwise/Relu6");
 
         // Add project node
         left << ConvolutionLayer(1U, 1U, output_channels,
                                  get_weights_accessor(data_path, total_path + "project_weights.npy", DataLayout::NCHW),
-                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
-             .set_name(param_path + "/project/Conv2D")
-             << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "project_BatchNorm_moving_mean.npy"),
-                                        get_weights_accessor(data_path, total_path + "project_BatchNorm_moving_variance.npy"),
-                                        get_weights_accessor(data_path, total_path + "project_BatchNorm_gamma.npy"),
-                                        get_weights_accessor(data_path, total_path + "project_BatchNorm_beta.npy"),
-                                        0.0010000000474974513)
-             .set_name(param_path + "/project/BatchNorm");
-
-        if(is_residual == IsResidual::Yes)
+                                 std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                 PadStrideInfo(1, 1, 0, 0))
+                    .set_name(param_path + "/project/Conv2D")
+             << BatchNormalizationLayer(
+                    get_weights_accessor(data_path, total_path + "project_BatchNorm_moving_mean.npy"),
+                    get_weights_accessor(data_path, total_path + "project_BatchNorm_moving_variance.npy"),
+                    get_weights_accessor(data_path, total_path + "project_BatchNorm_gamma.npy"),
+                    get_weights_accessor(data_path, total_path + "project_BatchNorm_beta.npy"), 0.0010000000474974513)
+                    .set_name(param_path + "/project/BatchNorm");
+
+        if (is_residual == IsResidual::Yes)
         {
             // Add residual node
             SubStream right(graph);
-            graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(param_path + "/add");
+            graph
+                << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(param_path + "/add");
         }
         else
         {
@@ -268,7 +289,7 @@ private:
         std::string data_path = common_params.data_path;
 
         // Add model path to data path
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += model_path;
         }
@@ -276,16 +297,14 @@ private:
         const QuantizationInfo in_quant_info  = QuantizationInfo(0.0078125f, 128);
         const QuantizationInfo mid_quant_info = QuantizationInfo(0.023528477177023888f, 128);
 
-        const std::vector<QuantizationInfo> conv_weights_quant_info =
-        {
+        const std::vector<QuantizationInfo> conv_weights_quant_info = {
             QuantizationInfo(0.03396892547607422f, 122),  // Conv
             QuantizationInfo(0.005167067516595125f, 125), // Conv1
             QuantizationInfo(0.0016910821432247758f, 113) // Conv2d_1c_1x1
         };
 
         // Pointwise expand convolution quantization info
-        const std::vector<QuantizationInfo> pwc_q =
-        {
+        const std::vector<QuantizationInfo> pwc_q = {
             QuantizationInfo(0.254282623529f, 129),        // expand_0 (Dummy)
             QuantizationInfo(0.009758507832884789f, 127),  // expand_1
             QuantizationInfo(0.0036556976847350597f, 144), // expand_2
@@ -305,8 +324,7 @@ private:
             QuantizationInfo(0.002046825597062707f, 135)   // expand_16
         };
         // Depthwise expand convolution quantization info
-        const std::vector<QuantizationInfo> dwc_q =
-        {
+        const std::vector<QuantizationInfo> dwc_q = {
             QuantizationInfo(0.3436955213546753f, 165),   // expand_0
             QuantizationInfo(0.020969120785593987f, 109), // expand_1
             QuantizationInfo(0.16981913149356842f, 52),   // expand_2
@@ -326,8 +344,7 @@ private:
             QuantizationInfo(0.16456253826618195, 201)    // expand_16
         };
         // Project convolution quantization info
-        const std::vector<QuantizationInfo> prwc_q =
-        {
+        const std::vector<QuantizationInfo> prwc_q = {
             QuantizationInfo(0.03737175464630127f, 140),  // expand_0
             QuantizationInfo(0.0225360207259655f, 156),   // expand_1
             QuantizationInfo(0.02740888111293316f, 122),  // expand_2
@@ -349,65 +366,84 @@ private:
 
         graph << InputLayer(input_descriptor.set_quantization_info(in_quant_info),
                             get_weights_accessor(data_path, common_params.image))
-              << ConvolutionLayer(
-                  3U, 3U, 32U,
-                  get_weights_accessor(data_path, "Conv_weights.npy"),
-                  get_weights_accessor(data_path, "Conv_bias.npy"),
-                  PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR),
-                  1, conv_weights_quant_info.at(0), mid_quant_info)
-              .set_name("Conv")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name("Conv/Relu6")
-              << DepthwiseConvolutionLayer(3U, 3U,
-                                           get_weights_accessor(data_path, "expanded_conv_depthwise_depthwise_weights.npy"),
-                                           get_weights_accessor(data_path, "expanded_conv_depthwise_depthwise_biases.npy"),
-                                           PadStrideInfo(1, 1, 1, 1), 1, dwc_q.at(0))
-              .set_name("expanded_conv/depthwise/depthwise")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name("expanded_conv/depthwise/Relu6")
-              << ConvolutionLayer(1U, 1U, 16U,
-                                  get_weights_accessor(data_path, "expanded_conv_project_weights.npy"),
+              << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "Conv_weights.npy"),
+                                  get_weights_accessor(data_path, "Conv_bias.npy"),
+                                  PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), 1,
+                                  conv_weights_quant_info.at(0), mid_quant_info)
+                     .set_name("Conv")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+                     .set_name("Conv/Relu6")
+              << DepthwiseConvolutionLayer(
+                     3U, 3U, get_weights_accessor(data_path, "expanded_conv_depthwise_depthwise_weights.npy"),
+                     get_weights_accessor(data_path, "expanded_conv_depthwise_depthwise_biases.npy"),
+                     PadStrideInfo(1, 1, 1, 1), 1, dwc_q.at(0))
+                     .set_name("expanded_conv/depthwise/depthwise")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+                     .set_name("expanded_conv/depthwise/Relu6")
+              << ConvolutionLayer(1U, 1U, 16U, get_weights_accessor(data_path, "expanded_conv_project_weights.npy"),
                                   get_weights_accessor(data_path, "expanded_conv_project_biases.npy"),
                                   PadStrideInfo(1, 1, 0, 0), 1, prwc_q.at(0))
-              .set_name("expanded_conv/project/Conv2D");
-
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_1", IsResidual::No, 96U, 24U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL),
-                                  pwc_q.at(1), dwc_q.at(1), prwc_q.at(1));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_2", IsResidual::Yes, 144U, 24U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(2), dwc_q.at(2), prwc_q.at(2));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_3", IsResidual::No, 144U, 32U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL),
-                                  pwc_q.at(3), dwc_q.at(3), prwc_q.at(3));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_4", IsResidual::Yes, 192U, 32U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(4), dwc_q.at(4), prwc_q.at(4));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_5", IsResidual::Yes, 192U, 32U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(5), dwc_q.at(5), prwc_q.at(5));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_6", IsResidual::No, 192U, 64U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL),
-                                  pwc_q.at(6), dwc_q.at(6), prwc_q.at(6));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_7", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(7), dwc_q.at(7), prwc_q.at(7));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_8", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(8), dwc_q.at(8), prwc_q.at(8));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_9", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(9), dwc_q.at(9), prwc_q.at(9));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_10", IsResidual::No, 384U, 96U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(10), dwc_q.at(10), prwc_q.at(10));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_11", IsResidual::Yes, 576U, 96U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(11), dwc_q.at(11), prwc_q.at(11));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_12", IsResidual::Yes, 576U, 96U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(12), dwc_q.at(12), prwc_q.at(12));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_13", IsResidual::No, 576U, 160U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL),
-                                  pwc_q.at(13), dwc_q.at(13), prwc_q.at(13));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_14", IsResidual::Yes, 960U, 160U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(14), dwc_q.at(14), prwc_q.at(14));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_15", IsResidual::Yes, 960U, 160U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(15), dwc_q.at(15), prwc_q.at(15));
-        get_expanded_conv_qasymm8(data_path, "expanded_conv_16", IsResidual::No, 960U, 320U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(16), dwc_q.at(16), prwc_q.at(16));
-
-        graph << ConvolutionLayer(1U, 1U, 1280U,
-                                  get_weights_accessor(data_path, "Conv_1_weights.npy"),
-                                  get_weights_accessor(data_path, "Conv_1_biases.npy"),
-                                  PadStrideInfo(1, 1, 0, 0), 1, conv_weights_quant_info.at(1))
-              .set_name("Conv_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name("Conv_1/Relu6")
+                     .set_name("expanded_conv/project/Conv2D");
+
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_1", IsResidual::No, 96U, 24U,
+                                  PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), pwc_q.at(1),
+                                  dwc_q.at(1), prwc_q.at(1));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_2", IsResidual::Yes, 144U, 24U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(2), dwc_q.at(2), prwc_q.at(2));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_3", IsResidual::No, 144U, 32U,
+                                  PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), pwc_q.at(3),
+                                  dwc_q.at(3), prwc_q.at(3));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_4", IsResidual::Yes, 192U, 32U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(4), dwc_q.at(4), prwc_q.at(4));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_5", IsResidual::Yes, 192U, 32U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(5), dwc_q.at(5), prwc_q.at(5));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_6", IsResidual::No, 192U, 64U,
+                                  PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), pwc_q.at(6),
+                                  dwc_q.at(6), prwc_q.at(6));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_7", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(7), dwc_q.at(7), prwc_q.at(7));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_8", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(8), dwc_q.at(8), prwc_q.at(8));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_9", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(9), dwc_q.at(9), prwc_q.at(9));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_10", IsResidual::No, 384U, 96U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(10), dwc_q.at(10), prwc_q.at(10));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_11", IsResidual::Yes, 576U, 96U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(11), dwc_q.at(11), prwc_q.at(11));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_12", IsResidual::Yes, 576U, 96U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(12), dwc_q.at(12), prwc_q.at(12));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_13", IsResidual::No, 576U, 160U,
+                                  PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), pwc_q.at(13),
+                                  dwc_q.at(13), prwc_q.at(13));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_14", IsResidual::Yes, 960U, 160U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(14), dwc_q.at(14), prwc_q.at(14));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_15", IsResidual::Yes, 960U, 160U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(15), dwc_q.at(15), prwc_q.at(15));
+        get_expanded_conv_qasymm8(data_path, "expanded_conv_16", IsResidual::No, 960U, 320U, PadStrideInfo(1, 1, 1, 1),
+                                  pwc_q.at(16), dwc_q.at(16), prwc_q.at(16));
+
+        graph << ConvolutionLayer(1U, 1U, 1280U, get_weights_accessor(data_path, "Conv_1_weights.npy"),
+                                  get_weights_accessor(data_path, "Conv_1_biases.npy"), PadStrideInfo(1, 1, 0, 0), 1,
+                                  conv_weights_quant_info.at(1))
+                     .set_name("Conv_1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+                     .set_name("Conv_1/Relu6")
               << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool")
-              << ConvolutionLayer(1U, 1U, 1001U,
-                                  get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"),
+              << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"),
                                   get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"),
                                   PadStrideInfo(1, 1, 0, 0), 1, conv_weights_quant_info.at(2))
-              .set_name("Logits/Conv2d_1c_1x1");
+                     .set_name("Logits/Conv2d_1c_1x1");
     }
 
-    void get_expanded_conv_qasymm8(const std::string &data_path, std::string &&param_path, IsResidual is_residual,
-                                   unsigned int input_channels, unsigned int output_channels,
+    void get_expanded_conv_qasymm8(const std::string      &data_path,
+                                   std::string           &&param_path,
+                                   IsResidual              is_residual,
+                                   unsigned int            input_channels,
+                                   unsigned int            output_channels,
                                    PadStrideInfo           dwc_pad_stride_info,
-                                   const QuantizationInfo &pwi, const QuantizationInfo &dwi, const QuantizationInfo &pji)
+                                   const QuantizationInfo &pwi,
+                                   const QuantizationInfo &dwi,
+                                   const QuantizationInfo &pji)
     {
         std::string total_path = param_path + "_";
 
@@ -416,25 +452,28 @@ private:
                                  get_weights_accessor(data_path, total_path + "project_weights.npy"),
                                  get_weights_accessor(data_path, total_path + "project_biases.npy"),
                                  PadStrideInfo(1, 1, 0, 0), 1, pwi)
-             .set_name(param_path + "/Conv2D")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name(param_path + "/Conv2D/Relu6")
-             << DepthwiseConvolutionLayer(3U, 3U,
-                                          get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy"),
-                                          get_weights_accessor(data_path, total_path + "depthwise_depthwise_biases.npy"),
-                                          dwc_pad_stride_info, 1, dwi)
-             .set_name(param_path + "/depthwise/depthwise")
-             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name(param_path + "/depthwise/Relu6")
+                    .set_name(param_path + "/Conv2D")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+                    .set_name(param_path + "/Conv2D/Relu6")
+             << DepthwiseConvolutionLayer(
+                    3U, 3U, get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy"),
+                    get_weights_accessor(data_path, total_path + "depthwise_depthwise_biases.npy"), dwc_pad_stride_info,
+                    1, dwi)
+                    .set_name(param_path + "/depthwise/depthwise")
+             << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f))
+                    .set_name(param_path + "/depthwise/Relu6")
              << ConvolutionLayer(1U, 1U, output_channels,
                                  get_weights_accessor(data_path, total_path + "project_weights.npy"),
                                  get_weights_accessor(data_path, total_path + "project_biases.npy"),
                                  PadStrideInfo(1, 1, 0, 0), 1, pji)
-             .set_name(param_path + "/project/Conv2D");
+                    .set_name(param_path + "/project/Conv2D");
 
-        if(is_residual == IsResidual::Yes)
+        if (is_residual == IsResidual::Yes)
         {
             // Add residual node
             SubStream right(graph);
-            graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(param_path + "/add");
+            graph
+                << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(param_path + "/add");
         }
         else
         {
diff --git a/examples/graph_resnet12.cpp b/examples/graph_resnet12.cpp
index 120cc9b755..80db826be5 100644
--- a/examples/graph_resnet12.cpp
+++ b/examples/graph_resnet12.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -36,7 +37,12 @@ class GraphResNet12Example : public Example
 {
 public:
     GraphResNet12Example()
-        : cmd_parser(), common_opts(cmd_parser), model_input_width(nullptr), model_input_height(nullptr), common_params(), graph(0, "ResNet12")
+        : cmd_parser(),
+          common_opts(cmd_parser),
+          model_input_width(nullptr),
+          model_input_height(nullptr),
+          common_params(),
+          graph(0, "ResNet12")
     {
         model_input_width  = cmd_parser.add_option<SimpleOption<unsigned int>>("image-width", 192);
         model_input_height = cmd_parser.add_option<SimpleOption<unsigned int>>("image-height", 128);
@@ -45,7 +51,7 @@ public:
         model_input_width->set_help("Input image width.");
         model_input_height->set_help("Input image height.");
     }
-    GraphResNet12Example(const GraphResNet12Example &) = delete;
+    GraphResNet12Example(const GraphResNet12Example &)            = delete;
     GraphResNet12Example &operator=(const GraphResNet12Example &) = delete;
     ~GraphResNet12Example() override                              = default;
     bool do_setup(int argc, char **argv) override
@@ -58,7 +64,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -69,7 +75,8 @@ public:
         const unsigned int image_height = model_input_height->value();
 
         // Checks
-        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type),
+                                "QASYMM8 not supported for this graph");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -81,54 +88,51 @@ public:
         const std::string model_path = "/cnn_data/resnet12_model/";
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>();
 
         // Create input descriptor
-        const TensorShape tensor_shape     = permute_shape(TensorShape(image_width, image_height, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(image_width, image_height, 3U, common_params.batches), DataLayout::NCHW,
+                          common_params.data_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */))
-              << ConvolutionLayer(
-                  9U, 9U, 64U,
-                  get_weights_accessor(data_path, "conv1_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv1_biases.npy", weights_layout),
-                  PadStrideInfo(1, 1, 4, 4))
-              .set_name("conv1/convolution")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu");
+        graph << common_params.target << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor),
+                                                                 false /* Do not convert to BGR */))
+              << ConvolutionLayer(9U, 9U, 64U, get_weights_accessor(data_path, "conv1_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv1_biases.npy", weights_layout),
+                                  PadStrideInfo(1, 1, 4, 4))
+                     .set_name("conv1/convolution")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv1/Relu");
 
         add_residual_block(data_path, "block1", weights_layout);
         add_residual_block(data_path, "block2", weights_layout);
         add_residual_block(data_path, "block3", weights_layout);
         add_residual_block(data_path, "block4", weights_layout);
 
-        graph << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, "conv10_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv10_biases.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv10/convolution")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv10/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, "conv11_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv11_biases.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv11/convolution")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv11/Relu")
-              << ConvolutionLayer(
-                  9U, 9U, 3U,
-                  get_weights_accessor(data_path, "conv12_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv12_biases.npy"),
-                  PadStrideInfo(1, 1, 4, 4))
-              .set_name("conv12/convolution")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH)).set_name("conv12/Tanh")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.58f, 0.5f)).set_name("conv12/Linear")
-              << OutputLayer(arm_compute::support::cpp14::make_unique<DummyAccessor>(0));
+        graph << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, "conv10_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv10_biases.npy"), PadStrideInfo(1, 1, 1, 1))
+                     .set_name("conv10/convolution")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv10/Relu")
+              << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, "conv11_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv11_biases.npy"), PadStrideInfo(1, 1, 1, 1))
+                     .set_name("conv11/convolution")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv11/Relu")
+              << ConvolutionLayer(9U, 9U, 3U, get_weights_accessor(data_path, "conv12_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv12_biases.npy"), PadStrideInfo(1, 1, 4, 4))
+                     .set_name("conv12/convolution")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH))
+                     .set_name("conv12/Tanh")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.58f, 0.5f))
+                     .set_name("conv12/Linear")
+              << OutputLayer(std::make_unique<DummyAccessor>(0));
 
         // Finalize graph
         GraphConfig config;
@@ -136,6 +140,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -151,8 +156,8 @@ public:
 private:
     CommandLineParser           cmd_parser;
     CommonGraphOptions          common_opts;
-    SimpleOption<unsigned int> *model_input_width{ nullptr };
-    SimpleOption<unsigned int> *model_input_height{ nullptr };
+    SimpleOption<unsigned int> *model_input_width{nullptr};
+    SimpleOption<unsigned int> *model_input_height{nullptr};
     CommonGraphParams           common_params;
     Stream                      graph;
 
@@ -169,35 +174,33 @@ private:
         SubStream left(graph);
         SubStream right(graph);
 
-        right << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, unit_path + "conv1_biases.npy", weights_layout),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name(unit_name + "conv1/convolution")
+        right << ConvolutionLayer(3U, 3U, 64U,
+                                  get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, unit_path + "conv1_biases.npy", weights_layout),
+                                  PadStrideInfo(1, 1, 1, 1))
+                     .set_name(unit_name + "conv1/convolution")
               << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"),
-                  get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"),
-                  get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"),
-                  get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"),
-                  0.0000100099996416f)
-              .set_name(unit_name + "conv1/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu")
-
-              << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, unit_path + "conv2_biases.npy", weights_layout),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name(unit_name + "conv2/convolution")
+                     get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"),
+                     get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"),
+                     get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"),
+                     get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"), 0.0000100099996416f)
+                     .set_name(unit_name + "conv1/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name(unit_name + "conv1/Relu")
+
+              << ConvolutionLayer(3U, 3U, 64U,
+                                  get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, unit_path + "conv2_biases.npy", weights_layout),
+                                  PadStrideInfo(1, 1, 1, 1))
+                     .set_name(unit_name + "conv2/convolution")
               << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"),
-                  get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"),
-                  get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"),
-                  get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"),
-                  0.0000100099996416f)
-              .set_name(unit_name + "conv2/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv2/Relu");
+                     get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"),
+                     get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"),
+                     get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"),
+                     get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"), 0.0000100099996416f)
+                     .set_name(unit_name + "conv2/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name(unit_name + "conv2/Relu");
 
         graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add");
     }
diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp
index 7af058e042..ba0f0d5fb6 100644
--- a/examples/graph_resnet50.cpp
+++ b/examples/graph_resnet50.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class GraphResNetV1_50Example : public Example
 {
 public:
-    GraphResNetV1_50Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNetV1_50")
+    GraphResNetV1_50Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNetV1_50")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,7 +49,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -62,36 +62,40 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb,
-                                                                                                                   false /* Do not convert to BGR */);
+        const std::array<float, 3>     mean_rgb{{122.68f, 116.67f, 104.01f}};
+        std::unique_ptr<IPreprocessor> preprocessor =
+            std::make_unique<CaffePreproccessor>(mean_rgb, false /* Do not convert to BGR */);
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */))
+        graph << common_params.target << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor),
+                                                                 false /* Do not convert to BGR */))
               << ConvolutionLayer(
-                  7U, 7U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(2, 2, 3, 3))
-              .set_name("conv1/convolution")
+                     7U, 7U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 3, 3))
+                     .set_name("conv1/convolution")
               << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_variance.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_beta.npy"),
-                  0.0000100099996416f)
-              .set_name("conv1/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool1/MaxPool");
+                     get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_mean.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_variance.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_gamma.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_beta.npy"),
+                     0.0000100099996416f)
+                     .set_name("conv1/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv1/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)))
+                     .set_name("pool1/MaxPool");
 
         add_residual_block(data_path, "block1", weights_layout, 64, 3, 2);
         add_residual_block(data_path, "block2", weights_layout, 128, 4, 2);
@@ -100,22 +104,23 @@ public:
 
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("pool5")
               << ConvolutionLayer(
-                  1U, 1U, 1000U,
-                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_biases.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("logits/convolution")
-              << FlattenLayer().set_name("predictions/Reshape")
-              << SoftmaxLayer().set_name("predictions/Softmax")
+                     1U, 1U, 1000U,
+                     get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy", weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_biases.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("logits/convolution")
+              << FlattenLayer().set_name("predictions/Reshape") << SoftmaxLayer().set_name("predictions/Softmax")
               << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         graph.finalize(common_params.target, config);
 
@@ -134,10 +139,14 @@ private:
     CommonGraphParams  common_params;
     Stream             graph;
 
-    void add_residual_block(const std::string &data_path, const std::string &name, DataLayout weights_layout,
-                            unsigned int base_depth, unsigned int num_units, unsigned int stride)
+    void add_residual_block(const std::string &data_path,
+                            const std::string &name,
+                            DataLayout         weights_layout,
+                            unsigned int       base_depth,
+                            unsigned int       num_units,
+                            unsigned int       stride)
     {
-        for(unsigned int i = 0; i < num_units; ++i)
+        for (unsigned int i = 0; i < num_units; ++i)
         {
             std::stringstream unit_path_ss;
             unit_path_ss << "/cnn_data/resnet50_model/" << name << "_unit_" << (i + 1) << "_bottleneck_v1_";
@@ -149,89 +158,90 @@ private:
 
             unsigned int middle_stride = 1;
 
-            if(i == (num_units - 1))
+            if (i == (num_units - 1))
             {
                 middle_stride = stride;
             }
 
             SubStream right(graph);
-            right << ConvolutionLayer(
-                      1U, 1U, base_depth,
-                      get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
-                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                      PadStrideInfo(1, 1, 0, 0))
-                  .set_name(unit_name + "conv1/convolution")
+            right << ConvolutionLayer(1U, 1U, base_depth,
+                                      get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
+                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                      PadStrideInfo(1, 1, 0, 0))
+                         .set_name(unit_name + "conv1/convolution")
                   << BatchNormalizationLayer(
-                      get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"),
-                      get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"),
-                      get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"),
-                      get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"),
-                      0.0000100099996416f)
-                  .set_name(unit_name + "conv1/BatchNorm")
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu")
+                         get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"),
+                         get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"),
+                         get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"),
+                         get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"), 0.0000100099996416f)
+                         .set_name(unit_name + "conv1/BatchNorm")
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "conv1/Relu")
 
-                  << ConvolutionLayer(
-                      3U, 3U, base_depth,
-                      get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
-                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                      PadStrideInfo(middle_stride, middle_stride, 1, 1))
-                  .set_name(unit_name + "conv2/convolution")
+                  << ConvolutionLayer(3U, 3U, base_depth,
+                                      get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
+                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                      PadStrideInfo(middle_stride, middle_stride, 1, 1))
+                         .set_name(unit_name + "conv2/convolution")
                   << BatchNormalizationLayer(
-                      get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"),
-                      get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"),
-                      get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"),
-                      get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"),
-                      0.0000100099996416f)
-                  .set_name(unit_name + "conv2/BatchNorm")
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu")
+                         get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"),
+                         get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"),
+                         get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"),
+                         get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"), 0.0000100099996416f)
+                         .set_name(unit_name + "conv2/BatchNorm")
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "conv1/Relu")
 
-                  << ConvolutionLayer(
-                      1U, 1U, base_depth * 4,
-                      get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
-                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                      PadStrideInfo(1, 1, 0, 0))
-                  .set_name(unit_name + "conv3/convolution")
+                  << ConvolutionLayer(1U, 1U, base_depth * 4,
+                                      get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
+                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                      PadStrideInfo(1, 1, 0, 0))
+                         .set_name(unit_name + "conv3/convolution")
                   << BatchNormalizationLayer(
-                      get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_mean.npy"),
-                      get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_variance.npy"),
-                      get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_gamma.npy"),
-                      get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_beta.npy"),
-                      0.0000100099996416f)
-                  .set_name(unit_name + "conv2/BatchNorm");
+                         get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_mean.npy"),
+                         get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_variance.npy"),
+                         get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_gamma.npy"),
+                         get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_beta.npy"), 0.0000100099996416f)
+                         .set_name(unit_name + "conv2/BatchNorm");
 
-            if(i == 0)
+            if (i == 0)
             {
                 SubStream left(graph);
                 left << ConvolutionLayer(
-                         1U, 1U, base_depth * 4,
-                         get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout),
-                         std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                         PadStrideInfo(1, 1, 0, 0))
-                     .set_name(unit_name + "shortcut/convolution")
+                            1U, 1U, base_depth * 4,
+                            get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout),
+                            std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                            .set_name(unit_name + "shortcut/convolution")
                      << BatchNormalizationLayer(
-                         get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_mean.npy"),
-                         get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_variance.npy"),
-                         get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_gamma.npy"),
-                         get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_beta.npy"),
-                         0.0000100099996416f)
-                     .set_name(unit_name + "shortcut/BatchNorm");
+                            get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_mean.npy"),
+                            get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_variance.npy"),
+                            get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_gamma.npy"),
+                            get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_beta.npy"),
+                            0.0000100099996416f)
+                            .set_name(unit_name + "shortcut/BatchNorm");
 
-                graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add");
+                graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add)
+                             .set_name(unit_name + "add");
             }
-            else if(middle_stride > 1)
+            else if (middle_stride > 1)
             {
                 SubStream left(graph);
-                left << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, common_params.data_layout, PadStrideInfo(middle_stride, middle_stride, 0, 0), true)).set_name(unit_name + "shortcut/MaxPool");
+                left << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, common_params.data_layout,
+                                                      PadStrideInfo(middle_stride, middle_stride, 0, 0), true))
+                            .set_name(unit_name + "shortcut/MaxPool");
 
-                graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add");
+                graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add)
+                             .set_name(unit_name + "add");
             }
             else
             {
                 SubStream left(graph);
-                graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add");
+                graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add)
+                             .set_name(unit_name + "add");
             }
 
-            graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu");
+            graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "Relu");
         }
     }
 };
diff --git a/examples/graph_resnet_v2_50.cpp b/examples/graph_resnet_v2_50.cpp
index 7d6b9aa3fd..48cf9b0b3c 100644
--- a/examples/graph_resnet_v2_50.cpp
+++ b/examples/graph_resnet_v2_50.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class GraphResNetV2_50Example : public Example
 {
 public:
-    GraphResNetV2_50Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNetV2_50")
+    GraphResNetV2_50Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNetV2_50")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,7 +49,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -61,64 +61,63 @@ public:
         // Get trainable parameters data path
         std::string data_path  = common_params.data_path;
         std::string model_path = "/cnn_data/resnet_v2_50_model/";
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += model_path;
         }
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>();
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */))
-              << ConvolutionLayer(
-                  7U, 7U, 64U,
-                  get_weights_accessor(data_path, "conv1_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv1_biases.npy", weights_layout),
-                  PadStrideInfo(2, 2, 3, 3))
-              .set_name("conv1/convolution")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool1/MaxPool");
+        graph << common_params.target << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor),
+                                                                 false /* Do not convert to BGR */))
+              << ConvolutionLayer(7U, 7U, 64U, get_weights_accessor(data_path, "conv1_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv1_biases.npy", weights_layout),
+                                  PadStrideInfo(2, 2, 3, 3))
+                     .set_name("conv1/convolution")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)))
+                     .set_name("pool1/MaxPool");
 
         add_residual_block(data_path, "block1", weights_layout, 64, 3, 2);
         add_residual_block(data_path, "block2", weights_layout, 128, 4, 2);
         add_residual_block(data_path, "block3", weights_layout, 256, 6, 2);
         add_residual_block(data_path, "block4", weights_layout, 512, 3, 1);
 
-        graph << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "postnorm_moving_mean.npy"),
-                  get_weights_accessor(data_path, "postnorm_moving_variance.npy"),
-                  get_weights_accessor(data_path, "postnorm_gamma.npy"),
-                  get_weights_accessor(data_path, "postnorm_beta.npy"),
-                  0.000009999999747378752f)
-              .set_name("postnorm/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("postnorm/Relu")
+        graph << BatchNormalizationLayer(get_weights_accessor(data_path, "postnorm_moving_mean.npy"),
+                                         get_weights_accessor(data_path, "postnorm_moving_variance.npy"),
+                                         get_weights_accessor(data_path, "postnorm_gamma.npy"),
+                                         get_weights_accessor(data_path, "postnorm_beta.npy"), 0.000009999999747378752f)
+                     .set_name("postnorm/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("postnorm/Relu")
               << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("pool5")
-              << ConvolutionLayer(
-                  1U, 1U, 1001U,
-                  get_weights_accessor(data_path, "logits_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "logits_biases.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("logits/convolution")
-              << FlattenLayer().set_name("predictions/Reshape")
-              << SoftmaxLayer().set_name("predictions/Softmax")
+              << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, "logits_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, "logits_biases.npy"), PadStrideInfo(1, 1, 0, 0))
+                     .set_name("logits/convolution")
+              << FlattenLayer().set_name("predictions/Reshape") << SoftmaxLayer().set_name("predictions/Softmax")
               << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         graph.finalize(common_params.target, config);
 
@@ -137,10 +136,14 @@ private:
     CommonGraphParams  common_params;
     Stream             graph;
 
-    void add_residual_block(const std::string &data_path, const std::string &name, DataLayout weights_layout,
-                            unsigned int base_depth, unsigned int num_units, unsigned int stride)
+    void add_residual_block(const std::string &data_path,
+                            const std::string &name,
+                            DataLayout         weights_layout,
+                            unsigned int       base_depth,
+                            unsigned int       num_units,
+                            unsigned int       stride)
     {
-        for(unsigned int i = 0; i < num_units; ++i)
+        for (unsigned int i = 0; i < num_units; ++i)
         {
             // Generate unit names
             std::stringstream unit_path_ss;
@@ -152,7 +155,8 @@ private:
             std::string unit_name = unit_name_ss.str();
 
             const TensorShape last_shape = graph.graph().node(graph.tail_node())->output(0)->desc().shape;
-            unsigned int      depth_in   = last_shape[arm_compute::get_data_layout_dimension_index(common_params.data_layout, DataLayoutDimension::CHANNEL)];
+            unsigned int      depth_in   = last_shape[arm_compute::get_data_layout_dimension_index(
+                       common_params.data_layout, DataLayoutDimension::CHANNEL)];
             unsigned int      depth_out  = base_depth * 4;
 
             // All units have stride 1 apart from last one
@@ -160,73 +164,76 @@ private:
 
             // Preact
             SubStream preact(graph);
-            preact << BatchNormalizationLayer(
-                       get_weights_accessor(data_path, unit_path + "preact_moving_mean.npy"),
-                       get_weights_accessor(data_path, unit_path + "preact_moving_variance.npy"),
-                       get_weights_accessor(data_path, unit_path + "preact_gamma.npy"),
-                       get_weights_accessor(data_path, unit_path + "preact_beta.npy"),
-                       0.000009999999747378752f)
-                   .set_name(unit_name + "preact/BatchNorm")
-                   << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "preact/Relu");
+            preact << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "preact_moving_mean.npy"),
+                                              get_weights_accessor(data_path, unit_path + "preact_moving_variance.npy"),
+                                              get_weights_accessor(data_path, unit_path + "preact_gamma.npy"),
+                                              get_weights_accessor(data_path, unit_path + "preact_beta.npy"),
+                                              0.000009999999747378752f)
+                          .set_name(unit_name + "preact/BatchNorm")
+                   << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                          .set_name(unit_name + "preact/Relu");
 
             // Create bottleneck path
             SubStream shortcut(graph);
-            if(depth_in == depth_out)
+            if (depth_in == depth_out)
             {
-                if(middle_stride != 1)
+                if (middle_stride != 1)
                 {
-                    shortcut << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, common_params.data_layout, PadStrideInfo(middle_stride, middle_stride, 0, 0), true)).set_name(unit_name + "shortcut/MaxPool");
+                    shortcut << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, common_params.data_layout,
+                                                              PadStrideInfo(middle_stride, middle_stride, 0, 0), true))
+                                    .set_name(unit_name + "shortcut/MaxPool");
                 }
             }
             else
             {
                 shortcut.forward_tail(preact.tail_node());
                 shortcut << ConvolutionLayer(
-                             1U, 1U, depth_out,
-                             get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout),
-                             get_weights_accessor(data_path, unit_path + "shortcut_biases.npy", weights_layout),
-                             PadStrideInfo(1, 1, 0, 0))
-                         .set_name(unit_name + "shortcut/convolution");
+                                1U, 1U, depth_out,
+                                get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout),
+                                get_weights_accessor(data_path, unit_path + "shortcut_biases.npy", weights_layout),
+                                PadStrideInfo(1, 1, 0, 0))
+                                .set_name(unit_name + "shortcut/convolution");
             }
 
             // Create residual path
             SubStream residual(preact);
-            residual << ConvolutionLayer(
-                         1U, 1U, base_depth,
-                         get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
-                         std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                         PadStrideInfo(1, 1, 0, 0))
-                     .set_name(unit_name + "conv1/convolution")
-                     << BatchNormalizationLayer(
-                         get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"),
-                         get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"),
-                         get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"),
-                         get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"),
-                         0.000009999999747378752f)
-                     .set_name(unit_name + "conv1/BatchNorm")
-                     << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu")
-                     << ConvolutionLayer(
-                         3U, 3U, base_depth,
-                         get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
-                         std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                         PadStrideInfo(middle_stride, middle_stride, 1, 1))
-                     .set_name(unit_name + "conv2/convolution")
-                     << BatchNormalizationLayer(
-                         get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"),
-                         get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"),
-                         get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"),
-                         get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"),
-                         0.000009999999747378752f)
-                     .set_name(unit_name + "conv2/BatchNorm")
-                     << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu")
-                     << ConvolutionLayer(
-                         1U, 1U, depth_out,
-                         get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
-                         get_weights_accessor(data_path, unit_path + "conv3_biases.npy", weights_layout),
-                         PadStrideInfo(1, 1, 0, 0))
-                     .set_name(unit_name + "conv3/convolution");
-
-            graph << EltwiseLayer(std::move(shortcut), std::move(residual), EltwiseOperation::Add).set_name(unit_name + "add");
+            residual
+                << ConvolutionLayer(1U, 1U, base_depth,
+                                    get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
+                                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                    PadStrideInfo(1, 1, 0, 0))
+                       .set_name(unit_name + "conv1/convolution")
+                << BatchNormalizationLayer(
+                       get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"),
+                       get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"),
+                       get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"),
+                       get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"),
+                       0.000009999999747378752f)
+                       .set_name(unit_name + "conv1/BatchNorm")
+                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                       .set_name(unit_name + "conv1/Relu")
+                << ConvolutionLayer(3U, 3U, base_depth,
+                                    get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
+                                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                    PadStrideInfo(middle_stride, middle_stride, 1, 1))
+                       .set_name(unit_name + "conv2/convolution")
+                << BatchNormalizationLayer(
+                       get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"),
+                       get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"),
+                       get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"),
+                       get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"),
+                       0.000009999999747378752f)
+                       .set_name(unit_name + "conv2/BatchNorm")
+                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                       .set_name(unit_name + "conv1/Relu")
+                << ConvolutionLayer(1U, 1U, depth_out,
+                                    get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
+                                    get_weights_accessor(data_path, unit_path + "conv3_biases.npy", weights_layout),
+                                    PadStrideInfo(1, 1, 0, 0))
+                       .set_name(unit_name + "conv3/convolution");
+
+            graph << EltwiseLayer(std::move(shortcut), std::move(residual), EltwiseOperation::Add)
+                         .set_name(unit_name + "add");
         }
     }
 };
diff --git a/examples/graph_resnext50.cpp b/examples/graph_resnext50.cpp
index 2c50594b0c..12a1507c4c 100644
--- a/examples/graph_resnext50.cpp
+++ b/examples/graph_resnext50.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class GraphResNeXt50Example : public Example
 {
 public:
-    GraphResNeXt50Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNeXt50")
+    GraphResNeXt50Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNeXt50")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,14 +49,15 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
         }
 
         // Checks
-        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type),
+                                "QASYMM8 not supported for this graph");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -66,28 +67,33 @@ public:
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor, get_input_accessor(common_params))
               << ScaleLayer(get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_mul.npy"),
                             get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_add.npy"))
-              .set_name("bn_data/Scale")
+                     .set_name("bn_data/Scale")
               << ConvolutionLayer(
-                  7U, 7U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_biases.npy"),
-                  PadStrideInfo(2, 2, 2, 3, 2, 3, DimensionRoundingType::FLOOR))
-              .set_name("conv0/Convolution")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool0");
-
-        add_residual_block(data_path, weights_layout, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1);
+                     7U, 7U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy", weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_biases.npy"),
+                     PadStrideInfo(2, 2, 2, 3, 2, 3, DimensionRoundingType::FLOOR))
+                     .set_name("conv0/Convolution")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv0/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)))
+                     .set_name("pool0");
+
+        add_residual_block(data_path, weights_layout, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3,
+                           /*stride_conv_unit1*/ 1);
         add_residual_block(data_path, weights_layout, 512, 2, 4, 2);
         add_residual_block(data_path, weights_layout, 1024, 3, 6, 2);
         add_residual_block(data_path, weights_layout, 2048, 4, 3, 2);
@@ -102,6 +108,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -120,10 +127,14 @@ private:
     CommonGraphParams  common_params;
     Stream             graph;
 
-    void add_residual_block(const std::string &data_path, DataLayout weights_layout,
-                            unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1)
+    void add_residual_block(const std::string &data_path,
+                            DataLayout         weights_layout,
+                            unsigned int       base_depth,
+                            unsigned int       stage,
+                            unsigned int       num_units,
+                            unsigned int       stride_conv_unit1)
     {
-        for(unsigned int i = 0; i < num_units; ++i)
+        for (unsigned int i = 0; i < num_units; ++i)
         {
             std::stringstream unit_path_ss;
             unit_path_ss << "/cnn_data/resnext50_model/stage" << stage << "_unit" << (i + 1) << "_";
@@ -134,54 +145,55 @@ private:
             std::string unit_name = unit_name_ss.str();
 
             PadStrideInfo pad_grouped_conv(1, 1, 1, 1);
-            if(i == 0)
+            if (i == 0)
             {
-                pad_grouped_conv = (stage == 1) ? PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 1, 1) : PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 1, 0, 1, DimensionRoundingType::FLOOR);
+                pad_grouped_conv = (stage == 1) ? PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 1, 1)
+                                                : PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 1, 0, 1,
+                                                                DimensionRoundingType::FLOOR);
             }
 
             SubStream right(graph);
-            right << ConvolutionLayer(
-                      1U, 1U, base_depth / 2,
-                      get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
-                      get_weights_accessor(data_path, unit_path + "conv1_biases.npy"),
-                      PadStrideInfo(1, 1, 0, 0))
-                  .set_name(unit_name + "conv1/convolution")
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu")
-
-                  << ConvolutionLayer(
-                      3U, 3U, base_depth / 2,
-                      get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
-                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                      pad_grouped_conv, 32)
-                  .set_name(unit_name + "conv2/convolution")
+            right << ConvolutionLayer(1U, 1U, base_depth / 2,
+                                      get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout),
+                                      get_weights_accessor(data_path, unit_path + "conv1_biases.npy"),
+                                      PadStrideInfo(1, 1, 0, 0))
+                         .set_name(unit_name + "conv1/convolution")
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "conv1/Relu")
+
+                  << ConvolutionLayer(3U, 3U, base_depth / 2,
+                                      get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout),
+                                      std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), pad_grouped_conv,
+                                      32)
+                         .set_name(unit_name + "conv2/convolution")
                   << ScaleLayer(get_weights_accessor(data_path, unit_path + "bn2_mul.npy"),
                                 get_weights_accessor(data_path, unit_path + "bn2_add.npy"))
-                  .set_name(unit_name + "conv1/Scale")
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv2/Relu")
+                         .set_name(unit_name + "conv1/Scale")
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "conv2/Relu")
 
-                  << ConvolutionLayer(
-                      1U, 1U, base_depth,
-                      get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
-                      get_weights_accessor(data_path, unit_path + "conv3_biases.npy"),
-                      PadStrideInfo(1, 1, 0, 0))
-                  .set_name(unit_name + "conv3/convolution");
+                  << ConvolutionLayer(1U, 1U, base_depth,
+                                      get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout),
+                                      get_weights_accessor(data_path, unit_path + "conv3_biases.npy"),
+                                      PadStrideInfo(1, 1, 0, 0))
+                         .set_name(unit_name + "conv3/convolution");
 
             SubStream left(graph);
-            if(i == 0)
+            if (i == 0)
             {
-                left << ConvolutionLayer(
-                         1U, 1U, base_depth,
-                         get_weights_accessor(data_path, unit_path + "sc_weights.npy", weights_layout),
-                         std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                         PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 0))
-                     .set_name(unit_name + "sc/convolution")
+                left << ConvolutionLayer(1U, 1U, base_depth,
+                                         get_weights_accessor(data_path, unit_path + "sc_weights.npy", weights_layout),
+                                         std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                         PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 0))
+                            .set_name(unit_name + "sc/convolution")
                      << ScaleLayer(get_weights_accessor(data_path, unit_path + "sc_bn_mul.npy"),
                                    get_weights_accessor(data_path, unit_path + "sc_bn_add.npy"))
-                     .set_name(unit_name + "sc/scale");
+                            .set_name(unit_name + "sc/scale");
             }
 
             graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add");
-            graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu");
+            graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(unit_name + "Relu");
         }
     }
 };
@@ -191,7 +203,7 @@ private:
  * Model is based on:
  *      https://arxiv.org/abs/1611.05431
  *      "Aggregated Residual Transformations for Deep Neural Networks"
- *      Saining Xie, Ross Girshick, Piotr Dollar, Zhuowen Tu, Kaiming He
+ *      Saining Xie, Ross Girshick, Piotr Dollar, Zhuowen Tu, Kaiming He.
  *
  * @note To list all the possible arguments execute the binary appended with the --help option
  *
diff --git a/examples/graph_shufflenet.cpp b/examples/graph_shufflenet.cpp
index 0b977982b5..513d95884e 100644
--- a/examples/graph_shufflenet.cpp
+++ b/examples/graph_shufflenet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class ShuffleNetExample : public Example
 {
 public:
-    ShuffleNetExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ShuffleNet")
+    ShuffleNetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ShuffleNet")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,20 +49,21 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
         }
 
         // Set default layout if needed (Single kernel grouped convolution not yet supported int NHWC)
-        if(!common_opts.data_layout->is_set())
+        if (!common_opts.data_layout->is_set())
         {
             common_params.data_layout = DataLayout::NHWC;
         }
 
         // Checks
-        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type),
+                                "QASYMM8 not supported for this graph");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -75,40 +76,40 @@ public:
         std::string data_path = common_params.data_path;
 
         // Add model path to data path
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += model_path;
         }
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
         // Create preprocessor
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(0);
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(0);
 
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */))
-              << ConvolutionLayer(
-                  3U, 3U, 24U,
-                  get_weights_accessor(data_path, "conv3_0_w_0.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv3_0_b_0.npy", weights_layout),
-                  PadStrideInfo(2, 2, 1, 1))
-              .set_name("Conv1/convolution")
-              << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "conv3_0_bn_rm_0.npy"),
-                  get_weights_accessor(data_path, "conv3_0_bn_riv_0.npy"),
-                  get_weights_accessor(data_path, "conv3_0_bn_s_0.npy"),
-                  get_weights_accessor(data_path, "conv3_0_bn_b_0.npy"),
-                  1e-5f)
-              .set_name("Conv1/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv1/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 1, 1))).set_name("pool1/MaxPool");
+        graph << common_params.target << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor),
+                                                                 false /* Do not convert to BGR */))
+              << ConvolutionLayer(3U, 3U, 24U, get_weights_accessor(data_path, "conv3_0_w_0.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv3_0_b_0.npy", weights_layout),
+                                  PadStrideInfo(2, 2, 1, 1))
+                     .set_name("Conv1/convolution")
+              << BatchNormalizationLayer(get_weights_accessor(data_path, "conv3_0_bn_rm_0.npy"),
+                                         get_weights_accessor(data_path, "conv3_0_bn_riv_0.npy"),
+                                         get_weights_accessor(data_path, "conv3_0_bn_s_0.npy"),
+                                         get_weights_accessor(data_path, "conv3_0_bn_b_0.npy"), 1e-5f)
+                     .set_name("Conv1/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("Conv1/Relu")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 1, 1)))
+                     .set_name("pool1/MaxPool");
 
         // Stage 2
         add_residual_block(data_path, DataLayout::NCHW, 0U /* unit */, 112U /* depth */, 2U /* stride */);
@@ -134,13 +135,10 @@ public:
 
         graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("predictions/AvgPool")
               << FlattenLayer().set_name("predictions/Reshape")
-              << FullyConnectedLayer(
-                  1000U,
-                  get_weights_accessor(data_path, "pred_w_0.npy", weights_layout),
-                  get_weights_accessor(data_path, "pred_b_0.npy"))
-              .set_name("predictions/FC")
-              << SoftmaxLayer().set_name("predictions/Softmax")
-              << OutputLayer(get_output_accessor(common_params, 5));
+              << FullyConnectedLayer(1000U, get_weights_accessor(data_path, "pred_w_0.npy", weights_layout),
+                                     get_weights_accessor(data_path, "pred_b_0.npy"))
+                     .set_name("predictions/FC")
+              << SoftmaxLayer().set_name("predictions/Softmax") << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
@@ -148,6 +146,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -166,8 +165,11 @@ private:
     CommonGraphParams  common_params;
     Stream             graph;
 
-    void add_residual_block(const std::string &data_path, DataLayout weights_layout,
-                            unsigned int unit, unsigned int depth, unsigned int stride)
+    void add_residual_block(const std::string &data_path,
+                            DataLayout         weights_layout,
+                            unsigned int       unit,
+                            unsigned int       depth,
+                            unsigned int       stride)
     {
         PadStrideInfo      dwc_info        = PadStrideInfo(1, 1, 1, 1);
         const unsigned int gconv_id        = unit * 2;
@@ -180,63 +182,61 @@ private:
         SubStream left_ss(graph);
         SubStream right_ss(graph);
 
-        if(stride == 2)
+        if (stride == 2)
         {
-            right_ss << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(2, 2, 1, 1))).set_name(unit_name + "/pool_1/AveragePool");
+            right_ss << PoolingLayer(
+                            PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(2, 2, 1, 1)))
+                            .set_name(unit_name + "/pool_1/AveragePool");
             dwc_info = PadStrideInfo(2, 2, 1, 1);
         }
 
-        left_ss << ConvolutionLayer(
-                    1U, 1U, depth,
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_w_0.npy", weights_layout),
-                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                    PadStrideInfo(1, 1, 0, 0), num_groups)
-                .set_name(unit_name + "/gconv1_" + gconv_id_name + "/convolution")
-                << BatchNormalizationLayer(
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_rm_0.npy"),
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_riv_0.npy"),
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_s_0.npy"),
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_b_0.npy"),
-                    1e-5f)
-                .set_name(unit_name + "/gconv1_" + gconv_id_name + "/BatchNorm")
-                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "/gconv1_" + gconv_id_name + "/Relu")
-                << ChannelShuffleLayer(num_groups).set_name(unit_name + "/shuffle_0/ChannelShufle")
-                << DepthwiseConvolutionLayer(
-                    3U, 3U,
-                    get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_w_0.npy", weights_layout),
-                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                    dwc_info)
-                .set_name(unit_name + "/gconv3_" + unit_id_name + "/depthwise")
-                << BatchNormalizationLayer(
-                    get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_rm_0.npy"),
-                    get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_riv_0.npy"),
-                    get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_s_0.npy"),
-                    get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_b_0.npy"),
-                    1e-5f)
-                .set_name(unit_name + "/gconv3_" + unit_id_name + "/BatchNorm")
-                << ConvolutionLayer(
-                    1U, 1U, depth,
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_w_0.npy", weights_layout),
-                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                    PadStrideInfo(1, 1, 0, 0), num_groups)
-                .set_name(unit_name + "/gconv1_" + gconv_id_1_name + "/convolution")
-                << BatchNormalizationLayer(
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_rm_0.npy"),
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_riv_0.npy"),
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_s_0.npy"),
-                    get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_b_0.npy"),
-                    1e-5f)
-                .set_name(unit_name + "/gconv1_" + gconv_id_1_name + "/BatchNorm");
+        left_ss
+            << ConvolutionLayer(1U, 1U, depth,
+                                get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_w_0.npy", weights_layout),
+                                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                PadStrideInfo(1, 1, 0, 0), num_groups)
+                   .set_name(unit_name + "/gconv1_" + gconv_id_name + "/convolution")
+            << BatchNormalizationLayer(get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_rm_0.npy"),
+                                       get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_riv_0.npy"),
+                                       get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_s_0.npy"),
+                                       get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_b_0.npy"),
+                                       1e-5f)
+                   .set_name(unit_name + "/gconv1_" + gconv_id_name + "/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(unit_name + "/gconv1_" + gconv_id_name + "/Relu")
+            << ChannelShuffleLayer(num_groups).set_name(unit_name + "/shuffle_0/ChannelShufle")
+            << DepthwiseConvolutionLayer(
+                   3U, 3U, get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_w_0.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), dwc_info)
+                   .set_name(unit_name + "/gconv3_" + unit_id_name + "/depthwise")
+            << BatchNormalizationLayer(get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_rm_0.npy"),
+                                       get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_riv_0.npy"),
+                                       get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_s_0.npy"),
+                                       get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_b_0.npy"), 1e-5f)
+                   .set_name(unit_name + "/gconv3_" + unit_id_name + "/BatchNorm")
+            << ConvolutionLayer(
+                   1U, 1U, depth,
+                   get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_w_0.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0), num_groups)
+                   .set_name(unit_name + "/gconv1_" + gconv_id_1_name + "/convolution")
+            << BatchNormalizationLayer(get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_rm_0.npy"),
+                                       get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_riv_0.npy"),
+                                       get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_s_0.npy"),
+                                       get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_b_0.npy"),
+                                       1e-5f)
+                   .set_name(unit_name + "/gconv1_" + gconv_id_1_name + "/BatchNorm");
 
-        if(stride == 2)
+        if (stride == 2)
         {
             graph << ConcatLayer(std::move(left_ss), std::move(right_ss)).set_name(unit_name + "/Concat");
         }
         else
         {
-            graph << EltwiseLayer(std::move(left_ss), std::move(right_ss), EltwiseOperation::Add).set_name(unit_name + "/Add");
+            graph << EltwiseLayer(std::move(left_ss), std::move(right_ss), EltwiseOperation::Add)
+                         .set_name(unit_name + "/Add");
         }
-        graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "/Relu");
+        graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name(unit_name + "/Relu");
     }
 };
 
diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp
index 35fceb4e98..7d0528f805 100644
--- a/examples/graph_squeezenet.cpp
+++ b/examples/graph_squeezenet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class GraphSqueezenetExample : public Example
 {
 public:
-    GraphSqueezenetExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1")
+    GraphSqueezenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,7 +49,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -62,113 +62,139 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+        const std::array<float, 3>     mean_rgb{{122.68f, 116.67f, 104.01f}};
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb);
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
               << ConvolutionLayer(
-                  7U, 7U, 96U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_b.npy"),
-                  PadStrideInfo(2, 2, 0, 0))
-              .set_name("conv1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu_conv1")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool1")
+                     7U, 7U, 96U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy", weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_b.npy"),
+                     PadStrideInfo(2, 2, 0, 0))
+                     .set_name("conv1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("relu_conv1")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool1")
               << ConvolutionLayer(
-                  1U, 1U, 16U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire2/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire2/relu_squeeze1x1");
+                     1U, 1U, 16U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire2/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire2/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U).set_name("fire2/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 16U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire3/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire3/relu_squeeze1x1");
+                     1U, 1U, 16U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire3/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire3/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U).set_name("fire3/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire4/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire4/relu_squeeze1x1");
+                     1U, 1U, 32U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire4/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire4/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U).set_name("fire4/concat");
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool4")
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool4")
               << ConvolutionLayer(
-                  1U, 1U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire5/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire5/relu_squeeze1x1");
+                     1U, 1U, 32U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire5/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire5/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U).set_name("fire5/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 48U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire6/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire6/relu_squeeze1x1");
+                     1U, 1U, 48U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire6/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire6/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U).set_name("fire6/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 48U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire7/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire7/relu_squeeze1x1");
+                     1U, 1U, 48U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire7/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire7/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U).set_name("fire7/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire8/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire8/relu_squeeze1x1");
+                     1U, 1U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire8/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire8/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U).set_name("fire8/concat");
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool8")
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool8")
               << ConvolutionLayer(
-                  1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire9/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire9/relu_squeeze1x1");
+                     1U, 1U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire9/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire9/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U).set_name("fire9/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 1000U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("conv10")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu_conv10")
+                     1U, 1U, 1000U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy", weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("conv10")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("relu_conv10")
               << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("pool10")
-              << FlattenLayer().set_name("flatten")
-              << SoftmaxLayer().set_name("prob")
+              << FlattenLayer().set_name("flatten") << SoftmaxLayer().set_name("prob")
               << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         graph.finalize(common_params.target, config);
 
@@ -186,27 +212,30 @@ private:
     CommonGraphParams  common_params;
     Stream             graph;
 
-    ConcatLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
-                                     unsigned int expand1_filt, unsigned int expand3_filt)
+    ConcatLayer get_expand_fire_node(const std::string &data_path,
+                                     std::string      &&param_path,
+                                     DataLayout         weights_layout,
+                                     unsigned int       expand1_filt,
+                                     unsigned int       expand3_filt)
     {
         std::string total_path = "/cnn_data/squeezenet_v1.0_model/" + param_path + "_";
         SubStream   i_a(graph);
-        i_a << ConvolutionLayer(
-                1U, 1U, expand1_filt,
-                get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "expand1x1_b.npy"),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/expand1x1")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_expand1x1");
+        i_a << ConvolutionLayer(1U, 1U, expand1_filt,
+                                get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout),
+                                get_weights_accessor(data_path, total_path + "expand1x1_b.npy"),
+                                PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/expand1x1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_expand1x1");
 
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(
-                3U, 3U, expand3_filt,
-                get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "expand3x3_b.npy"),
-                PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/expand3x3")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_expand3x3");
+        i_b << ConvolutionLayer(3U, 3U, expand3_filt,
+                                get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout),
+                                get_weights_accessor(data_path, total_path + "expand3x3_b.npy"),
+                                PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/expand3x3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_expand3x3");
 
         return ConcatLayer(std::move(i_a), std::move(i_b));
     }
diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp
index f648b6337d..ed0f692db2 100644
--- a/examples/graph_squeezenet_v1_1.cpp
+++ b/examples/graph_squeezenet_v1_1.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class GraphSqueezenet_v1_1Example : public Example
 {
 public:
-    GraphSqueezenet_v1_1Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1.1")
+    GraphSqueezenet_v1_1Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1.1")
     {
     }
     bool do_setup(int argc, char **argv) override
@@ -49,7 +49,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -62,113 +62,139 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } };
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+        const std::array<float, 3>     mean_rgb{{122.68f, 116.67f, 104.01f}};
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb);
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(227U, 227U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(227U, 227U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
               << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_b.npy"),
-                  PadStrideInfo(2, 2, 0, 0))
-              .set_name("conv1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu_conv1")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool1")
+                     3U, 3U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy", weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_b.npy"),
+                     PadStrideInfo(2, 2, 0, 0))
+                     .set_name("conv1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("relu_conv1")
+              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool1")
               << ConvolutionLayer(
-                  1U, 1U, 16U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire2/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire2/relu_squeeze1x1");
+                     1U, 1U, 16U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire2/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire2/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U).set_name("fire2/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 16U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire3/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire3/relu_squeeze1x1");
+                     1U, 1U, 16U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire3/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire3/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U).set_name("fire3/concat");
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool3")
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool3")
               << ConvolutionLayer(
-                  1U, 1U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire4/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire4/relu_squeeze1x1");
+                     1U, 1U, 32U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire4/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire4/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U).set_name("fire4/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire5/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire5/relu_squeeze1x1");
+                     1U, 1U, 32U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire5/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire5/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U).set_name("fire5/concat");
-        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool5")
+        graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout,
+                                               PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL)))
+                     .set_name("pool5")
               << ConvolutionLayer(
-                  1U, 1U, 48U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire6/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire6/relu_squeeze1x1");
+                     1U, 1U, 48U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire6/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire6/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U).set_name("fire6/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 48U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire7/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire7/relu_squeeze1x1");
+                     1U, 1U, 48U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire7/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire7/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U).set_name("fire7/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire8/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire8/relu_squeeze1x1");
+                     1U, 1U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire8/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire8/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U).set_name("fire8/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("fire9/squeeze1x1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire9/relu_squeeze1x1");
+                     1U, 1U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy",
+                                          weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("fire9/squeeze1x1")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("fire9/relu_squeeze1x1");
         graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U).set_name("fire9/concat");
         graph << ConvolutionLayer(
-                  1U, 1U, 1000U,
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_b.npy"),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("conv10")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu_conv10")
+                     1U, 1U, 1000U,
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy", weights_layout),
+                     get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_b.npy"),
+                     PadStrideInfo(1, 1, 0, 0))
+                     .set_name("conv10")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("relu_conv10")
               << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("pool10")
-              << FlattenLayer().set_name("flatten")
-              << SoftmaxLayer().set_name("prob")
+              << FlattenLayer().set_name("flatten") << SoftmaxLayer().set_name("prob")
               << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         graph.finalize(common_params.target, config);
 
@@ -186,27 +212,30 @@ private:
     CommonGraphParams  common_params;
     Stream             graph;
 
-    ConcatLayer get_expand_fire_node(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
-                                     unsigned int expand1_filt, unsigned int expand3_filt)
+    ConcatLayer get_expand_fire_node(const std::string &data_path,
+                                     std::string      &&param_path,
+                                     DataLayout         weights_layout,
+                                     unsigned int       expand1_filt,
+                                     unsigned int       expand3_filt)
     {
         std::string total_path = "/cnn_data/squeezenet_v1_1_model/" + param_path + "_";
         SubStream   i_a(graph);
-        i_a << ConvolutionLayer(
-                1U, 1U, expand1_filt,
-                get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "expand1x1_b.npy"),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name(param_path + "/expand1x1")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_expand1x1");
+        i_a << ConvolutionLayer(1U, 1U, expand1_filt,
+                                get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout),
+                                get_weights_accessor(data_path, total_path + "expand1x1_b.npy"),
+                                PadStrideInfo(1, 1, 0, 0))
+                   .set_name(param_path + "/expand1x1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_expand1x1");
 
         SubStream i_b(graph);
-        i_b << ConvolutionLayer(
-                3U, 3U, expand3_filt,
-                get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout),
-                get_weights_accessor(data_path, total_path + "expand3x3_b.npy"),
-                PadStrideInfo(1, 1, 1, 1))
-            .set_name(param_path + "/expand3x3")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_expand3x3");
+        i_b << ConvolutionLayer(3U, 3U, expand3_filt,
+                                get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout),
+                                get_weights_accessor(data_path, total_path + "expand3x3_b.npy"),
+                                PadStrideInfo(1, 1, 1, 1))
+                   .set_name(param_path + "/expand3x3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name(param_path + "/relu_expand3x3");
 
         return ConcatLayer(std::move(i_a), std::move(i_b));
     }
diff --git a/examples/graph_srcnn955.cpp b/examples/graph_srcnn955.cpp
index 18921065d7..15a8b5d8ec 100644
--- a/examples/graph_srcnn955.cpp
+++ b/examples/graph_srcnn955.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -36,7 +37,12 @@ class GraphSRCNN955Example : public Example
 {
 public:
     GraphSRCNN955Example()
-        : cmd_parser(), common_opts(cmd_parser), model_input_width(nullptr), model_input_height(nullptr), common_params(), graph(0, "SRCNN955")
+        : cmd_parser(),
+          common_opts(cmd_parser),
+          model_input_width(nullptr),
+          model_input_height(nullptr),
+          common_params(),
+          graph(0, "SRCNN955")
     {
         model_input_width  = cmd_parser.add_option<SimpleOption<unsigned int>>("image-width", 300);
         model_input_height = cmd_parser.add_option<SimpleOption<unsigned int>>("image-height", 300);
@@ -45,7 +51,7 @@ public:
         model_input_width->set_help("Input image width.");
         model_input_height->set_help("Input image height.");
     }
-    GraphSRCNN955Example(const GraphSRCNN955Example &) = delete;
+    GraphSRCNN955Example(const GraphSRCNN955Example &)            = delete;
     GraphSRCNN955Example &operator=(const GraphSRCNN955Example &) = delete;
     ~GraphSRCNN955Example() override                              = default;
     bool do_setup(int argc, char **argv) override
@@ -58,7 +64,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -78,48 +84,47 @@ public:
         const std::string model_path = "/cnn_data/srcnn955_model/";
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>();
 
         // Create input descriptor
-        const TensorShape tensor_shape     = permute_shape(TensorShape(image_width, image_height, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(image_width, image_height, 3U, common_params.batches), DataLayout::NCHW,
+                          common_params.data_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */))
-              << ConvolutionLayer(
-                  9U, 9U, 64U,
-                  get_weights_accessor(data_path, "conv1_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv1_biases.npy"),
-                  PadStrideInfo(1, 1, 4, 4))
-              .set_name("conv1/convolution")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu")
-              << ConvolutionLayer(
-                  5U, 5U, 32U,
-                  get_weights_accessor(data_path, "conv2_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv2_biases.npy"),
-                  PadStrideInfo(1, 1, 2, 2))
-              .set_name("conv2/convolution")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2/Relu")
-              << ConvolutionLayer(
-                  5U, 5U, 3U,
-                  get_weights_accessor(data_path, "conv3_weights.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv3_biases.npy"),
-                  PadStrideInfo(1, 1, 2, 2))
-              .set_name("conv3/convolution")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3/Relu")
-              << OutputLayer(arm_compute::support::cpp14::make_unique<DummyAccessor>(0));
+        graph << common_params.target << common_params.fast_math_hint
+              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor),
+                                                                 false /* Do not convert to BGR */))
+              << ConvolutionLayer(9U, 9U, 64U, get_weights_accessor(data_path, "conv1_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv1_biases.npy"), PadStrideInfo(1, 1, 4, 4))
+                     .set_name("conv1/convolution")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv1/Relu")
+              << ConvolutionLayer(5U, 5U, 32U, get_weights_accessor(data_path, "conv2_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv2_biases.npy"), PadStrideInfo(1, 1, 2, 2))
+                     .set_name("conv2/convolution")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv2/Relu")
+              << ConvolutionLayer(5U, 5U, 3U, get_weights_accessor(data_path, "conv3_weights.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv3_biases.npy"), PadStrideInfo(1, 1, 2, 2))
+                     .set_name("conv3/convolution")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv3/Relu")
+              << OutputLayer(std::make_unique<DummyAccessor>(0));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         graph.finalize(common_params.target, config);
 
@@ -135,8 +140,8 @@ public:
 private:
     CommandLineParser           cmd_parser;
     CommonGraphOptions          common_opts;
-    SimpleOption<unsigned int> *model_input_width{ nullptr };
-    SimpleOption<unsigned int> *model_input_height{ nullptr };
+    SimpleOption<unsigned int> *model_input_width{nullptr};
+    SimpleOption<unsigned int> *model_input_height{nullptr};
     CommonGraphParams           common_params;
     Stream                      graph;
 };
diff --git a/examples/graph_ssd_mobilenet.cpp b/examples/graph_ssd_mobilenet.cpp
index f2a8b30bb2..6218d47dd6 100644
--- a/examples/graph_ssd_mobilenet.cpp
+++ b/examples/graph_ssd_mobilenet.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2023 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,7 +21,9 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -36,23 +38,26 @@ using namespace arm_compute::graph_utils;
 class GraphSSDMobilenetExample : public Example
 {
 public:
-    GraphSSDMobilenetExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetSSD")
+    GraphSSDMobilenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetSSD")
     {
         // Add topk option
         keep_topk_opt = cmd_parser.add_option<SimpleOption<int>>("topk", 100);
         keep_topk_opt->set_help("Top k detections results per image. Used for data type F32.");
         // Add output option
         detection_boxes_opt = cmd_parser.add_option<SimpleOption<std::string>>("detection_boxes_opt", "");
-        detection_boxes_opt->set_help("Filename containing the reference values for the graph output detection_boxes. Used for data type QASYMM8.");
+        detection_boxes_opt->set_help("Filename containing the reference values for the graph output detection_boxes. "
+                                      "Used for data type QASYMM8.");
         detection_classes_opt = cmd_parser.add_option<SimpleOption<std::string>>("detection_classes_opt", "");
-        detection_classes_opt->set_help("Filename containing the reference values for the output detection_classes. Used for data type QASYMM8.");
+        detection_classes_opt->set_help(
+            "Filename containing the reference values for the output detection_classes. Used for data type QASYMM8.");
         detection_scores_opt = cmd_parser.add_option<SimpleOption<std::string>>("detection_scores_opt", "");
-        detection_scores_opt->set_help("Filename containing the reference values for the output detection_scores. Used for data type QASYMM8.");
+        detection_scores_opt->set_help(
+            "Filename containing the reference values for the output detection_scores. Used for data type QASYMM8.");
         num_detections_opt = cmd_parser.add_option<SimpleOption<std::string>>("num_detections_opt", "");
-        num_detections_opt->set_help("Filename containing the reference values for the output num_detections. Used with datatype QASYMM8.");
+        num_detections_opt->set_help(
+            "Filename containing the reference values for the output num_detections. Used with datatype QASYMM8.");
     }
-    GraphSSDMobilenetExample(const GraphSSDMobilenetExample &) = delete;
+    GraphSSDMobilenetExample(const GraphSSDMobilenetExample &)            = delete;
     GraphSSDMobilenetExample &operator=(const GraphSSDMobilenetExample &) = delete;
     ~GraphSSDMobilenetExample() override                                  = default;
     bool do_setup(int argc, char **argv) override
@@ -65,7 +70,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -75,15 +80,16 @@ public:
         std::cout << common_params << std::endl;
 
         // Create input descriptor
-        const TensorShape tensor_shape     = permute_shape(TensorShape(300, 300, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(300, 300, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
         // Set graph hints
-        graph << common_params.target
-              << common_params.fast_math_hint;
+        graph << common_params.target << common_params.fast_math_hint;
 
         // Create core graph
-        if(arm_compute::is_data_type_float(common_params.data_type))
+        if (arm_compute::is_data_type_float(common_params.data_type))
         {
             create_graph_float(input_descriptor);
         }
@@ -97,6 +103,7 @@ public:
         config.num_threads = common_params.threads;
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -111,99 +118,98 @@ public:
 private:
     CommandLineParser  cmd_parser;
     CommonGraphOptions common_opts;
-    SimpleOption<int> *keep_topk_opt{ nullptr };
+    SimpleOption<int> *keep_topk_opt{nullptr};
     CommonGraphParams  common_params;
     Stream             graph;
 
-    SimpleOption<std::string> *detection_boxes_opt{ nullptr };
-    SimpleOption<std::string> *detection_classes_opt{ nullptr };
-    SimpleOption<std::string> *detection_scores_opt{ nullptr };
-    SimpleOption<std::string> *num_detections_opt{ nullptr };
-
-    ConcatLayer get_node_A_float(IStream &master_graph, const std::string &data_path, std::string &&param_path,
-                                 unsigned int  conv_filt,
-                                 PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info)
+    SimpleOption<std::string> *detection_boxes_opt{nullptr};
+    SimpleOption<std::string> *detection_classes_opt{nullptr};
+    SimpleOption<std::string> *detection_scores_opt{nullptr};
+    SimpleOption<std::string> *num_detections_opt{nullptr};
+
+    ConcatLayer get_node_A_float(IStream           &main_graph,
+                                 const std::string &data_path,
+                                 std::string      &&param_path,
+                                 unsigned int       conv_filt,
+                                 PadStrideInfo      dwc_pad_stride_info,
+                                 PadStrideInfo      conv_pad_stride_info)
     {
         const std::string total_path = param_path + "_";
-        SubStream         sg(master_graph);
-
-        sg << DepthwiseConvolutionLayer(
-               3U, 3U,
-               get_weights_accessor(data_path, total_path + "dw_w.npy"),
-               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-               dwc_pad_stride_info)
-           .set_name(param_path + "/dw")
+        SubStream         sg(main_graph);
+
+        sg << DepthwiseConvolutionLayer(3U, 3U, get_weights_accessor(data_path, total_path + "dw_w.npy"),
+                                        std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                        dwc_pad_stride_info)
+                  .set_name(param_path + "/dw")
            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "dw_bn_mean.npy"),
                                       get_weights_accessor(data_path, total_path + "dw_bn_var.npy"),
                                       get_weights_accessor(data_path, total_path + "dw_scale_w.npy"),
                                       get_weights_accessor(data_path, total_path + "dw_scale_b.npy"), 0.00001f)
-           .set_name(param_path + "/dw/bn")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "dw/relu")
-
-           << ConvolutionLayer(
-               1U, 1U, conv_filt,
-               get_weights_accessor(data_path, total_path + "w.npy"),
-               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-               conv_pad_stride_info)
-           .set_name(param_path + "/pw")
+                  .set_name(param_path + "/dw/bn")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                  .set_name(param_path + "dw/relu")
+
+           << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "w.npy"),
+                               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), conv_pad_stride_info)
+                  .set_name(param_path + "/pw")
            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "bn_mean.npy"),
                                       get_weights_accessor(data_path, total_path + "bn_var.npy"),
                                       get_weights_accessor(data_path, total_path + "scale_w.npy"),
                                       get_weights_accessor(data_path, total_path + "scale_b.npy"), 0.00001f)
-           .set_name(param_path + "/pw/bn")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "pw/relu");
+                  .set_name(param_path + "/pw/bn")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                  .set_name(param_path + "pw/relu");
 
         return ConcatLayer(std::move(sg));
     }
 
-    ConcatLayer get_node_B_float(IStream &master_graph, const std::string &data_path, std::string &&param_path,
-                                 unsigned int  conv_filt,
-                                 PadStrideInfo conv_pad_stride_info_1, PadStrideInfo conv_pad_stride_info_2)
+    ConcatLayer get_node_B_float(IStream           &main_graph,
+                                 const std::string &data_path,
+                                 std::string      &&param_path,
+                                 unsigned int       conv_filt,
+                                 PadStrideInfo      conv_pad_stride_info_1,
+                                 PadStrideInfo      conv_pad_stride_info_2)
     {
         const std::string total_path = param_path + "_";
-        SubStream         sg(master_graph);
-
-        sg << ConvolutionLayer(
-               1, 1, conv_filt / 2,
-               get_weights_accessor(data_path, total_path + "1_w.npy"),
-               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-               conv_pad_stride_info_1)
-           .set_name(total_path + "1/conv")
+        SubStream         sg(main_graph);
+
+        sg << ConvolutionLayer(1, 1, conv_filt / 2, get_weights_accessor(data_path, total_path + "1_w.npy"),
+                               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), conv_pad_stride_info_1)
+                  .set_name(total_path + "1/conv")
            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "1_bn_mean.npy"),
                                       get_weights_accessor(data_path, total_path + "1_bn_var.npy"),
                                       get_weights_accessor(data_path, total_path + "1_scale_w.npy"),
                                       get_weights_accessor(data_path, total_path + "1_scale_b.npy"), 0.00001f)
-           .set_name(total_path + "1/bn")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(total_path + "1/relu");
-
-        sg << ConvolutionLayer(
-               3, 3, conv_filt,
-               get_weights_accessor(data_path, total_path + "2_w.npy"),
-               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-               conv_pad_stride_info_2)
-           .set_name(total_path + "2/conv")
+                  .set_name(total_path + "1/bn")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                  .set_name(total_path + "1/relu");
+
+        sg << ConvolutionLayer(3, 3, conv_filt, get_weights_accessor(data_path, total_path + "2_w.npy"),
+                               std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), conv_pad_stride_info_2)
+                  .set_name(total_path + "2/conv")
            << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "2_bn_mean.npy"),
                                       get_weights_accessor(data_path, total_path + "2_bn_var.npy"),
                                       get_weights_accessor(data_path, total_path + "2_scale_w.npy"),
                                       get_weights_accessor(data_path, total_path + "2_scale_b.npy"), 0.00001f)
-           .set_name(total_path + "2/bn")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(total_path + "2/relu");
+                  .set_name(total_path + "2/bn")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                  .set_name(total_path + "2/relu");
 
         return ConcatLayer(std::move(sg));
     }
 
-    ConcatLayer get_node_C_float(IStream &master_graph, const std::string &data_path, std::string &&param_path,
-                                 unsigned int conv_filt, PadStrideInfo conv_pad_stride_info)
+    ConcatLayer get_node_C_float(IStream           &main_graph,
+                                 const std::string &data_path,
+                                 std::string      &&param_path,
+                                 unsigned int       conv_filt,
+                                 PadStrideInfo      conv_pad_stride_info)
     {
         const std::string total_path = param_path + "_";
-        SubStream         sg(master_graph);
-        sg << ConvolutionLayer(
-               1U, 1U, conv_filt,
-               get_weights_accessor(data_path, total_path + "w.npy"),
-               get_weights_accessor(data_path, total_path + "b.npy"),
-               conv_pad_stride_info)
-           .set_name(param_path + "/conv");
-        if(common_params.data_layout == DataLayout::NCHW)
+        SubStream         sg(main_graph);
+        sg << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "w.npy"),
+                               get_weights_accessor(data_path, total_path + "b.npy"), conv_pad_stride_info)
+                  .set_name(param_path + "/conv");
+        if (common_params.data_layout == DataLayout::NCHW)
         {
             sg << PermuteLayer(PermutationVector(2U, 0U, 1U), DataLayout::NHWC).set_name(param_path + "/perm");
         }
@@ -215,62 +221,77 @@ private:
     void create_graph_float(TensorDescriptor &input_descriptor)
     {
         // Create a preprocessor object
-        const std::array<float, 3> mean_rgb{ { 127.5f, 127.5f, 127.5f } };
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb, true, 0.007843f);
+        const std::array<float, 3>     mean_rgb{{127.5f, 127.5f, 127.5f}};
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb, true, 0.007843f);
 
         // Get trainable parameters data path
         std::string data_path = common_params.data_path;
 
         // Add model path to data path
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += "/cnn_data/ssd_mobilenet_model/";
         }
 
-        graph << InputLayer(input_descriptor,
-                            get_input_accessor(common_params, std::move(preprocessor)));
+        graph << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)));
 
         SubStream conv_11(graph);
-        conv_11 << ConvolutionLayer(
-                    3U, 3U, 32U,
-                    get_weights_accessor(data_path, "conv0_w.npy"),
-                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                    PadStrideInfo(2, 2, 1, 1))
-                .set_name("conv0");
+        conv_11 << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "conv0_w.npy"),
+                                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
+                                    PadStrideInfo(2, 2, 1, 1))
+                       .set_name("conv0");
         conv_11 << BatchNormalizationLayer(get_weights_accessor(data_path, "conv0_bn_mean.npy"),
                                            get_weights_accessor(data_path, "conv0_bn_var.npy"),
                                            get_weights_accessor(data_path, "conv0_scale_w.npy"),
                                            get_weights_accessor(data_path, "conv0_scale_b.npy"), 0.00001f)
-                .set_name("conv0/bn")
-                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/relu");
-
-        conv_11 << get_node_A_float(conv_11, data_path, "conv1", 64, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv2", 128, PadStrideInfo(2, 2, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv3", 128, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv4", 256, PadStrideInfo(2, 2, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv5", 256, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv6", 512, PadStrideInfo(2, 2, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv7", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv8", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv9", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv10", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_11 << get_node_A_float(conv_11, data_path, "conv11", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
+                       .set_name("conv0/bn")
+                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                       .set_name("conv0/relu");
+
+        conv_11 << get_node_A_float(conv_11, data_path, "conv1", 64, PadStrideInfo(1, 1, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv2", 128, PadStrideInfo(2, 2, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv3", 128, PadStrideInfo(1, 1, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv4", 256, PadStrideInfo(2, 2, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv5", 256, PadStrideInfo(1, 1, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv6", 512, PadStrideInfo(2, 2, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv7", 512, PadStrideInfo(1, 1, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv8", 512, PadStrideInfo(1, 1, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv9", 512, PadStrideInfo(1, 1, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv10", 512, PadStrideInfo(1, 1, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_11 << get_node_A_float(conv_11, data_path, "conv11", 512, PadStrideInfo(1, 1, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
 
         SubStream conv_13(conv_11);
-        conv_13 << get_node_A_float(conv_11, data_path, "conv12", 1024, PadStrideInfo(2, 2, 1, 1), PadStrideInfo(1, 1, 0, 0));
-        conv_13 << get_node_A_float(conv_13, data_path, "conv13", 1024, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0));
+        conv_13 << get_node_A_float(conv_11, data_path, "conv12", 1024, PadStrideInfo(2, 2, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
+        conv_13 << get_node_A_float(conv_13, data_path, "conv13", 1024, PadStrideInfo(1, 1, 1, 1),
+                                    PadStrideInfo(1, 1, 0, 0));
 
         SubStream conv_14(conv_13);
-        conv_14 << get_node_B_float(conv_13, data_path, "conv14", 512, PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 2, 1, 1));
+        conv_14 << get_node_B_float(conv_13, data_path, "conv14", 512, PadStrideInfo(1, 1, 0, 0),
+                                    PadStrideInfo(2, 2, 1, 1));
 
         SubStream conv_15(conv_14);
-        conv_15 << get_node_B_float(conv_14, data_path, "conv15", 256, PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 2, 1, 1));
+        conv_15 << get_node_B_float(conv_14, data_path, "conv15", 256, PadStrideInfo(1, 1, 0, 0),
+                                    PadStrideInfo(2, 2, 1, 1));
 
         SubStream conv_16(conv_15);
-        conv_16 << get_node_B_float(conv_15, data_path, "conv16", 256, PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 2, 1, 1));
+        conv_16 << get_node_B_float(conv_15, data_path, "conv16", 256, PadStrideInfo(1, 1, 0, 0),
+                                    PadStrideInfo(2, 2, 1, 1));
 
         SubStream conv_17(conv_16);
-        conv_17 << get_node_B_float(conv_16, data_path, "conv17", 128, PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 2, 1, 1));
+        conv_17 << get_node_B_float(conv_16, data_path, "conv17", 128, PadStrideInfo(1, 1, 0, 0),
+                                    PadStrideInfo(2, 2, 1, 1));
 
         //mbox_loc
         SubStream conv_11_mbox_loc(conv_11);
@@ -292,8 +313,9 @@ private:
         conv_17_2_mbox_loc << get_node_C_float(conv_17, data_path, "conv17_2_mbox_loc", 24, PadStrideInfo(1, 1, 0, 0));
 
         SubStream mbox_loc(graph);
-        mbox_loc << ConcatLayer(std::move(conv_11_mbox_loc), std::move(conv_13_mbox_loc), conv_14_2_mbox_loc, std::move(conv_15_2_mbox_loc),
-                                std::move(conv_16_2_mbox_loc), std::move(conv_17_2_mbox_loc));
+        mbox_loc << ConcatLayer(std::move(conv_11_mbox_loc), std::move(conv_13_mbox_loc), conv_14_2_mbox_loc,
+                                std::move(conv_15_2_mbox_loc), std::move(conv_16_2_mbox_loc),
+                                std::move(conv_17_2_mbox_loc));
 
         //mbox_conf
         SubStream conv_11_mbox_conf(conv_11);
@@ -303,67 +325,79 @@ private:
         conv_13_mbox_conf << get_node_C_float(conv_13, data_path, "conv13_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0));
 
         SubStream conv_14_2_mbox_conf(conv_14);
-        conv_14_2_mbox_conf << get_node_C_float(conv_14, data_path, "conv14_2_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0));
+        conv_14_2_mbox_conf << get_node_C_float(conv_14, data_path, "conv14_2_mbox_conf", 126,
+                                                PadStrideInfo(1, 1, 0, 0));
 
         SubStream conv_15_2_mbox_conf(conv_15);
-        conv_15_2_mbox_conf << get_node_C_float(conv_15, data_path, "conv15_2_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0));
+        conv_15_2_mbox_conf << get_node_C_float(conv_15, data_path, "conv15_2_mbox_conf", 126,
+                                                PadStrideInfo(1, 1, 0, 0));
 
         SubStream conv_16_2_mbox_conf(conv_16);
-        conv_16_2_mbox_conf << get_node_C_float(conv_16, data_path, "conv16_2_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0));
+        conv_16_2_mbox_conf << get_node_C_float(conv_16, data_path, "conv16_2_mbox_conf", 126,
+                                                PadStrideInfo(1, 1, 0, 0));
 
         SubStream conv_17_2_mbox_conf(conv_17);
-        conv_17_2_mbox_conf << get_node_C_float(conv_17, data_path, "conv17_2_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0));
+        conv_17_2_mbox_conf << get_node_C_float(conv_17, data_path, "conv17_2_mbox_conf", 126,
+                                                PadStrideInfo(1, 1, 0, 0));
 
         SubStream mbox_conf(graph);
-        mbox_conf << ConcatLayer(std::move(conv_11_mbox_conf), std::move(conv_13_mbox_conf), std::move(conv_14_2_mbox_conf),
-                                 std::move(conv_15_2_mbox_conf), std::move(conv_16_2_mbox_conf), std::move(conv_17_2_mbox_conf));
+        mbox_conf << ConcatLayer(std::move(conv_11_mbox_conf), std::move(conv_13_mbox_conf),
+                                 std::move(conv_14_2_mbox_conf), std::move(conv_15_2_mbox_conf),
+                                 std::move(conv_16_2_mbox_conf), std::move(conv_17_2_mbox_conf));
         mbox_conf << ReshapeLayer(TensorShape(21U, 1917U)).set_name("mbox_conf/reshape");
         mbox_conf << SoftmaxLayer().set_name("mbox_conf/softmax");
         mbox_conf << FlattenLayer().set_name("mbox_conf/flat");
 
-        const std::vector<float> priorbox_variances     = { 0.1f, 0.1f, 0.2f, 0.2f };
+        const std::vector<float> priorbox_variances     = {0.1f, 0.1f, 0.2f, 0.2f};
         const float              priorbox_offset        = 0.5f;
-        const std::vector<float> priorbox_aspect_ratios = { 2.f, 3.f };
+        const std::vector<float> priorbox_aspect_ratios = {2.f, 3.f};
 
         //mbox_priorbox branch
         SubStream conv_11_mbox_priorbox(conv_11);
 
         conv_11_mbox_priorbox << PriorBoxLayer(SubStream(graph),
-                                               PriorBoxLayerInfo({ 60.f }, priorbox_variances, priorbox_offset, true, false, {}, { 2.f }))
-                              .set_name("conv11/priorbox");
+                                               PriorBoxLayerInfo({60.f}, priorbox_variances, priorbox_offset, true,
+                                                                 false, {}, {2.f}))
+                                     .set_name("conv11/priorbox");
 
         SubStream conv_13_mbox_priorbox(conv_13);
         conv_13_mbox_priorbox << PriorBoxLayer(SubStream(graph),
-                                               PriorBoxLayerInfo({ 105.f }, priorbox_variances, priorbox_offset, true, false, { 150.f }, priorbox_aspect_ratios))
-                              .set_name("conv13/priorbox");
+                                               PriorBoxLayerInfo({105.f}, priorbox_variances, priorbox_offset, true,
+                                                                 false, {150.f}, priorbox_aspect_ratios))
+                                     .set_name("conv13/priorbox");
 
         SubStream conv_14_2_mbox_priorbox(conv_14);
         conv_14_2_mbox_priorbox << PriorBoxLayer(SubStream(graph),
-                                                 PriorBoxLayerInfo({ 150.f }, priorbox_variances, priorbox_offset, true, false, { 195.f }, priorbox_aspect_ratios))
-                                .set_name("conv14/priorbox");
+                                                 PriorBoxLayerInfo({150.f}, priorbox_variances, priorbox_offset, true,
+                                                                   false, {195.f}, priorbox_aspect_ratios))
+                                       .set_name("conv14/priorbox");
 
         SubStream conv_15_2_mbox_priorbox(conv_15);
         conv_15_2_mbox_priorbox << PriorBoxLayer(SubStream(graph),
-                                                 PriorBoxLayerInfo({ 195.f }, priorbox_variances, priorbox_offset, true, false, { 240.f }, priorbox_aspect_ratios))
-                                .set_name("conv15/priorbox");
+                                                 PriorBoxLayerInfo({195.f}, priorbox_variances, priorbox_offset, true,
+                                                                   false, {240.f}, priorbox_aspect_ratios))
+                                       .set_name("conv15/priorbox");
 
         SubStream conv_16_2_mbox_priorbox(conv_16);
         conv_16_2_mbox_priorbox << PriorBoxLayer(SubStream(graph),
-                                                 PriorBoxLayerInfo({ 240.f }, priorbox_variances, priorbox_offset, true, false, { 285.f }, priorbox_aspect_ratios))
-                                .set_name("conv16/priorbox");
+                                                 PriorBoxLayerInfo({240.f}, priorbox_variances, priorbox_offset, true,
+                                                                   false, {285.f}, priorbox_aspect_ratios))
+                                       .set_name("conv16/priorbox");
 
         SubStream conv_17_2_mbox_priorbox(conv_17);
         conv_17_2_mbox_priorbox << PriorBoxLayer(SubStream(graph),
-                                                 PriorBoxLayerInfo({ 285.f }, priorbox_variances, priorbox_offset, true, false, { 300.f }, priorbox_aspect_ratios))
-                                .set_name("conv17/priorbox");
+                                                 PriorBoxLayerInfo({285.f}, priorbox_variances, priorbox_offset, true,
+                                                                   false, {300.f}, priorbox_aspect_ratios))
+                                       .set_name("conv17/priorbox");
 
         SubStream mbox_priorbox(graph);
 
         mbox_priorbox << ConcatLayer(
-                          (common_params.data_layout == DataLayout::NCHW) ? arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::WIDTH) : arm_compute::graph::descriptors::ConcatLayerDescriptor(
-                              DataLayoutDimension::CHANNEL),
-                          std::move(conv_11_mbox_priorbox), std::move(conv_13_mbox_priorbox), std::move(conv_14_2_mbox_priorbox),
-                          std::move(conv_15_2_mbox_priorbox), std::move(conv_16_2_mbox_priorbox), std::move(conv_17_2_mbox_priorbox));
+            (common_params.data_layout == DataLayout::NCHW)
+                ? arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::WIDTH)
+                : arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::CHANNEL),
+            std::move(conv_11_mbox_priorbox), std::move(conv_13_mbox_priorbox), std::move(conv_14_2_mbox_priorbox),
+            std::move(conv_15_2_mbox_priorbox), std::move(conv_16_2_mbox_priorbox), std::move(conv_17_2_mbox_priorbox));
 
         const int                          num_classes         = 21;
         const bool                         share_location      = true;
@@ -376,77 +410,85 @@ private:
 
         SubStream detection_ouput(mbox_loc);
         detection_ouput << DetectionOutputLayer(std::move(mbox_conf), std::move(mbox_priorbox),
-                                                DetectionOutputLayerInfo(num_classes, share_location, detection_type, keep_top_k, nms_threshold, top_k, label_id_background, conf_thrs));
-        detection_ouput << OutputLayer(get_detection_output_accessor(common_params, { input_descriptor.shape }));
+                                                DetectionOutputLayerInfo(num_classes, share_location, detection_type,
+                                                                         keep_top_k, nms_threshold, top_k,
+                                                                         label_id_background, conf_thrs));
+        detection_ouput << OutputLayer(get_detection_output_accessor(common_params, {input_descriptor.shape}));
     }
 
-    ConcatLayer get_node_A_qasymm(IStream &master_graph, const std::string &data_path, std::string &&param_path,
-                                  unsigned int  conv_filt,
-                                  PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info,
-                                  std::pair<QuantizationInfo, QuantizationInfo> depth_quant_info, std::pair<QuantizationInfo, QuantizationInfo> point_quant_info)
+    ConcatLayer get_node_A_qasymm(IStream                                      &main_graph,
+                                  const std::string                            &data_path,
+                                  std::string                                 &&param_path,
+                                  unsigned int                                  conv_filt,
+                                  PadStrideInfo                                 dwc_pad_stride_info,
+                                  PadStrideInfo                                 conv_pad_stride_info,
+                                  std::pair<QuantizationInfo, QuantizationInfo> depth_quant_info,
+                                  std::pair<QuantizationInfo, QuantizationInfo> point_quant_info)
     {
         const std::string total_path = param_path + "_";
-        SubStream         sg(master_graph);
-
-        sg << DepthwiseConvolutionLayer(
-               3U, 3U,
-               get_weights_accessor(data_path, total_path + "dw_w.npy"),
-               get_weights_accessor(data_path, total_path + "dw_b.npy"),
-               dwc_pad_stride_info, 1, depth_quant_info.first, depth_quant_info.second)
-           .set_name(param_path + "/dw")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(param_path + "/dw/relu6");
-
-        sg << ConvolutionLayer(
-               1U, 1U, conv_filt,
-               get_weights_accessor(data_path, total_path + "w.npy"),
-               get_weights_accessor(data_path, total_path + "b.npy"),
-               conv_pad_stride_info, 1, point_quant_info.first, point_quant_info.second)
-           .set_name(param_path + "/pw")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(param_path + "/pw/relu6");
+        SubStream         sg(main_graph);
+
+        sg << DepthwiseConvolutionLayer(3U, 3U, get_weights_accessor(data_path, total_path + "dw_w.npy"),
+                                        get_weights_accessor(data_path, total_path + "dw_b.npy"), dwc_pad_stride_info,
+                                        1, depth_quant_info.first, depth_quant_info.second)
+                  .set_name(param_path + "/dw")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
+                  .set_name(param_path + "/dw/relu6");
+
+        sg << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "w.npy"),
+                               get_weights_accessor(data_path, total_path + "b.npy"), conv_pad_stride_info, 1,
+                               point_quant_info.first, point_quant_info.second)
+                  .set_name(param_path + "/pw")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
+                  .set_name(param_path + "/pw/relu6");
 
         return ConcatLayer(std::move(sg));
     }
 
-    ConcatLayer get_node_B_qasymm(IStream &master_graph, const std::string &data_path, std::string &&param_path,
-                                  unsigned int  conv_filt,
-                                  PadStrideInfo conv_pad_stride_info_1x1, PadStrideInfo conv_pad_stride_info_3x3,
-                                  const std::pair<QuantizationInfo, QuantizationInfo> quant_info_1x1, const std::pair<QuantizationInfo, QuantizationInfo> quant_info_3x3)
+    ConcatLayer get_node_B_qasymm(IStream                                            &main_graph,
+                                  const std::string                                  &data_path,
+                                  std::string                                       &&param_path,
+                                  unsigned int                                        conv_filt,
+                                  PadStrideInfo                                       conv_pad_stride_info_1x1,
+                                  PadStrideInfo                                       conv_pad_stride_info_3x3,
+                                  const std::pair<QuantizationInfo, QuantizationInfo> quant_info_1x1,
+                                  const std::pair<QuantizationInfo, QuantizationInfo> quant_info_3x3)
     {
         const std::string total_path = param_path + "_";
-        SubStream         sg(master_graph);
-
-        sg << ConvolutionLayer(
-               1, 1, conv_filt / 2,
-               get_weights_accessor(data_path, total_path + "1x1_w.npy"),
-               get_weights_accessor(data_path, total_path + "1x1_b.npy"),
-               conv_pad_stride_info_1x1, 1, quant_info_1x1.first, quant_info_1x1.second)
-           .set_name(total_path + "1x1/conv")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "1x1/conv/relu6");
-
-        sg << ConvolutionLayer(
-               3, 3, conv_filt,
-               get_weights_accessor(data_path, total_path + "3x3_w.npy"),
-               get_weights_accessor(data_path, total_path + "3x3_b.npy"),
-               conv_pad_stride_info_3x3, 1, quant_info_3x3.first, quant_info_3x3.second)
-           .set_name(total_path + "3x3/conv")
-           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "3x3/conv/relu6");
+        SubStream         sg(main_graph);
+
+        sg << ConvolutionLayer(1, 1, conv_filt / 2, get_weights_accessor(data_path, total_path + "1x1_w.npy"),
+                               get_weights_accessor(data_path, total_path + "1x1_b.npy"), conv_pad_stride_info_1x1, 1,
+                               quant_info_1x1.first, quant_info_1x1.second)
+                  .set_name(total_path + "1x1/conv")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
+                  .set_name(total_path + "1x1/conv/relu6");
+
+        sg << ConvolutionLayer(3, 3, conv_filt, get_weights_accessor(data_path, total_path + "3x3_w.npy"),
+                               get_weights_accessor(data_path, total_path + "3x3_b.npy"), conv_pad_stride_info_3x3, 1,
+                               quant_info_3x3.first, quant_info_3x3.second)
+                  .set_name(total_path + "3x3/conv")
+           << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
+                  .set_name(total_path + "3x3/conv/relu6");
 
         return ConcatLayer(std::move(sg));
     }
 
-    ConcatLayer get_node_C_qasymm(IStream &master_graph, const std::string &data_path, std::string &&param_path,
-                                  unsigned int conv_filt, PadStrideInfo               conv_pad_stride_info,
-                                  const std::pair<QuantizationInfo, QuantizationInfo> quant_info, TensorShape reshape_shape)
+    ConcatLayer get_node_C_qasymm(IStream                                            &main_graph,
+                                  const std::string                                  &data_path,
+                                  std::string                                       &&param_path,
+                                  unsigned int                                        conv_filt,
+                                  PadStrideInfo                                       conv_pad_stride_info,
+                                  const std::pair<QuantizationInfo, QuantizationInfo> quant_info,
+                                  TensorShape                                         reshape_shape)
     {
         const std::string total_path = param_path + "_";
-        SubStream         sg(master_graph);
-        sg << ConvolutionLayer(
-               1U, 1U, conv_filt,
-               get_weights_accessor(data_path, total_path + "w.npy"),
-               get_weights_accessor(data_path, total_path + "b.npy"),
-               conv_pad_stride_info, 1, quant_info.first, quant_info.second)
-           .set_name(param_path + "/conv");
-        if(common_params.data_layout == DataLayout::NCHW)
+        SubStream         sg(main_graph);
+        sg << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "w.npy"),
+                               get_weights_accessor(data_path, total_path + "b.npy"), conv_pad_stride_info, 1,
+                               quant_info.first, quant_info.second)
+                  .set_name(param_path + "/conv");
+        if (common_params.data_layout == DataLayout::NCHW)
         {
             sg << PermuteLayer(PermutationVector(2U, 0U, 1U), DataLayout::NHWC);
         }
@@ -461,57 +503,59 @@ private:
         std::string data_path = common_params.data_path;
 
         // Add model path to data path
-        if(!data_path.empty())
+        if (!data_path.empty())
         {
             data_path += "/cnn_data/ssd_mobilenet_qasymm8_model/";
         }
 
         // Quantization info are saved as pair for each (pointwise/depthwise) convolution layer: <weight_quant_info, output_quant_info>
-        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> conv_quant_info =
-        {
-            { QuantizationInfo(0.03624850884079933f, 163), QuantizationInfo(0.22219789028167725f, 113) },   // conv0
-            { QuantizationInfo(0.0028752065263688564f, 113), QuantizationInfo(0.05433657020330429f, 128) }, // conv13_2_1_1
-            { QuantizationInfo(0.0014862528769299388f, 125), QuantizationInfo(0.05037643015384674f, 131) }, // conv13_2_3_3
-            { QuantizationInfo(0.00233650766313076f, 113), QuantizationInfo(0.04468846693634987f, 126) },   // conv13_3_1_1
-            { QuantizationInfo(0.002501056529581547f, 120), QuantizationInfo(0.06026708707213402f, 111) },  // conv13_3_3_3
-            { QuantizationInfo(0.002896666992455721f, 121), QuantizationInfo(0.037775348871946335f, 117) }, // conv13_4_1_1
-            { QuantizationInfo(0.0023875406477600336f, 122), QuantizationInfo(0.03881589323282242f, 108) }, // conv13_4_3_3
-            { QuantizationInfo(0.0022081052884459496f, 77), QuantizationInfo(0.025450613349676132f, 125) }, // conv13_5_1_1
-            { QuantizationInfo(0.00604657270014286f, 121), QuantizationInfo(0.033533502370119095f, 109) }   // conv13_5_3_3
+        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> conv_quant_info = {
+            {QuantizationInfo(0.03624850884079933f, 163), QuantizationInfo(0.22219789028167725f, 113)}, // conv0
+            {QuantizationInfo(0.0028752065263688564f, 113),
+             QuantizationInfo(0.05433657020330429f, 128)}, // conv13_2_1_1
+            {QuantizationInfo(0.0014862528769299388f, 125),
+             QuantizationInfo(0.05037643015384674f, 131)},                                               // conv13_2_3_3
+            {QuantizationInfo(0.00233650766313076f, 113), QuantizationInfo(0.04468846693634987f, 126)},  // conv13_3_1_1
+            {QuantizationInfo(0.002501056529581547f, 120), QuantizationInfo(0.06026708707213402f, 111)}, // conv13_3_3_3
+            {QuantizationInfo(0.002896666992455721f, 121),
+             QuantizationInfo(0.037775348871946335f, 117)}, // conv13_4_1_1
+            {QuantizationInfo(0.0023875406477600336f, 122),
+             QuantizationInfo(0.03881589323282242f, 108)}, // conv13_4_3_3
+            {QuantizationInfo(0.0022081052884459496f, 77),
+             QuantizationInfo(0.025450613349676132f, 125)},                                             // conv13_5_1_1
+            {QuantizationInfo(0.00604657270014286f, 121), QuantizationInfo(0.033533502370119095f, 109)} // conv13_5_3_3
         };
 
-        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> depth_quant_info =
-        {
-            { QuantizationInfo(0.03408717364072f, 131), QuantizationInfo(0.29286590218544006f, 108) },     // dwsc1
-            { QuantizationInfo(0.027518004179000854f, 107), QuantizationInfo(0.20796941220760345, 117) },  // dwsc2
-            { QuantizationInfo(0.052489638328552246f, 85), QuantizationInfo(0.4303881824016571f, 142) },   // dwsc3
-            { QuantizationInfo(0.016570359468460083f, 79), QuantizationInfo(0.10512150079011917f, 116) },  // dwsc4
-            { QuantizationInfo(0.060739465057849884f, 65), QuantizationInfo(0.15331414341926575f, 94) },   // dwsc5
-            { QuantizationInfo(0.01324534136801958f, 124), QuantizationInfo(0.13010895252227783f, 153) },  // dwsc6
-            { QuantizationInfo(0.032326459884643555f, 124), QuantizationInfo(0.11565316468477249, 156) },  // dwsc7
-            { QuantizationInfo(0.029948478564620018f, 155), QuantizationInfo(0.11413891613483429f, 146) }, // dwsc8
-            { QuantizationInfo(0.028054025024175644f, 129), QuantizationInfo(0.1142905130982399f, 140) },  // dwsc9
-            { QuantizationInfo(0.025204822421073914f, 129), QuantizationInfo(0.14668069779872894f, 149) }, // dwsc10
-            { QuantizationInfo(0.019332280382514f, 110), QuantizationInfo(0.1480235457420349f, 91) },      // dwsc11
-            { QuantizationInfo(0.0319712869822979f, 88), QuantizationInfo(0.10424695909023285f, 117) },    // dwsc12
-            { QuantizationInfo(0.04378943517804146f, 164), QuantizationInfo(0.23176774382591248f, 138) }   // dwsc13
+        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> depth_quant_info = {
+            {QuantizationInfo(0.03408717364072f, 131), QuantizationInfo(0.29286590218544006f, 108)},     // dwsc1
+            {QuantizationInfo(0.027518004179000854f, 107), QuantizationInfo(0.20796941220760345, 117)},  // dwsc2
+            {QuantizationInfo(0.052489638328552246f, 85), QuantizationInfo(0.4303881824016571f, 142)},   // dwsc3
+            {QuantizationInfo(0.016570359468460083f, 79), QuantizationInfo(0.10512150079011917f, 116)},  // dwsc4
+            {QuantizationInfo(0.060739465057849884f, 65), QuantizationInfo(0.15331414341926575f, 94)},   // dwsc5
+            {QuantizationInfo(0.01324534136801958f, 124), QuantizationInfo(0.13010895252227783f, 153)},  // dwsc6
+            {QuantizationInfo(0.032326459884643555f, 124), QuantizationInfo(0.11565316468477249, 156)},  // dwsc7
+            {QuantizationInfo(0.029948478564620018f, 155), QuantizationInfo(0.11413891613483429f, 146)}, // dwsc8
+            {QuantizationInfo(0.028054025024175644f, 129), QuantizationInfo(0.1142905130982399f, 140)},  // dwsc9
+            {QuantizationInfo(0.025204822421073914f, 129), QuantizationInfo(0.14668069779872894f, 149)}, // dwsc10
+            {QuantizationInfo(0.019332280382514f, 110), QuantizationInfo(0.1480235457420349f, 91)},      // dwsc11
+            {QuantizationInfo(0.0319712869822979f, 88), QuantizationInfo(0.10424695909023285f, 117)},    // dwsc12
+            {QuantizationInfo(0.04378943517804146f, 164), QuantizationInfo(0.23176774382591248f, 138)}   // dwsc13
         };
 
-        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> point_quant_info =
-        {
-            { QuantizationInfo(0.028777318075299263f, 144), QuantizationInfo(0.2663874328136444f, 121) },  // pw1
-            { QuantizationInfo(0.015796702355146408f, 127), QuantizationInfo(0.1739964485168457f, 111) },  // pw2
-            { QuantizationInfo(0.009349990636110306f, 127), QuantizationInfo(0.1805974692106247f, 104) },  // pw3
-            { QuantizationInfo(0.012920888140797615f, 106), QuantizationInfo(0.1205204650759697f, 100) },  // pw4
-            { QuantizationInfo(0.008119508624076843f, 145), QuantizationInfo(0.12272439152002335f, 97) },  // pw5
-            { QuantizationInfo(0.0070041813887655735f, 115), QuantizationInfo(0.0947074219584465f, 101) }, // pw6
-            { QuantizationInfo(0.004827278666198254f, 115), QuantizationInfo(0.0842885747551918f, 110) },  // pw7
-            { QuantizationInfo(0.004755120258778334f, 128), QuantizationInfo(0.08283159881830215f, 116) }, // pw8
-            { QuantizationInfo(0.007527193054556847f, 142), QuantizationInfo(0.12555131316184998f, 137) }, // pw9
-            { QuantizationInfo(0.006050156895071268f, 109), QuantizationInfo(0.10871313512325287f, 124) }, // pw10
-            { QuantizationInfo(0.00490700313821435f, 127), QuantizationInfo(0.10364262014627457f, 140) },  // pw11
-            { QuantizationInfo(0.006063731852918863, 124), QuantizationInfo(0.11241862177848816f, 125) },  // pw12
-            { QuantizationInfo(0.007901716977357864f, 139), QuantizationInfo(0.49889302253723145f, 141) }  // pw13
+        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> point_quant_info = {
+            {QuantizationInfo(0.028777318075299263f, 144), QuantizationInfo(0.2663874328136444f, 121)},  // pw1
+            {QuantizationInfo(0.015796702355146408f, 127), QuantizationInfo(0.1739964485168457f, 111)},  // pw2
+            {QuantizationInfo(0.009349990636110306f, 127), QuantizationInfo(0.1805974692106247f, 104)},  // pw3
+            {QuantizationInfo(0.012920888140797615f, 106), QuantizationInfo(0.1205204650759697f, 100)},  // pw4
+            {QuantizationInfo(0.008119508624076843f, 145), QuantizationInfo(0.12272439152002335f, 97)},  // pw5
+            {QuantizationInfo(0.0070041813887655735f, 115), QuantizationInfo(0.0947074219584465f, 101)}, // pw6
+            {QuantizationInfo(0.004827278666198254f, 115), QuantizationInfo(0.0842885747551918f, 110)},  // pw7
+            {QuantizationInfo(0.004755120258778334f, 128), QuantizationInfo(0.08283159881830215f, 116)}, // pw8
+            {QuantizationInfo(0.007527193054556847f, 142), QuantizationInfo(0.12555131316184998f, 137)}, // pw9
+            {QuantizationInfo(0.006050156895071268f, 109), QuantizationInfo(0.10871313512325287f, 124)}, // pw10
+            {QuantizationInfo(0.00490700313821435f, 127), QuantizationInfo(0.10364262014627457f, 140)},  // pw11
+            {QuantizationInfo(0.006063731852918863, 124), QuantizationInfo(0.11241862177848816f, 125)},  // pw12
+            {QuantizationInfo(0.007901716977357864f, 139), QuantizationInfo(0.49889302253723145f, 141)}  // pw13
         };
 
         // Quantization info taken from the TfLite SSD MobileNet example
@@ -519,114 +563,154 @@ private:
         // Create core graph
         graph << InputLayer(input_descriptor.set_quantization_info(in_quant_info),
                             get_weights_accessor(data_path, common_params.image, DataLayout::NHWC));
-        graph << ConvolutionLayer(
-                  3U, 3U, 32U,
-                  get_weights_accessor(data_path, "conv0_w.npy"),
-                  get_weights_accessor(data_path, "conv0_b.npy"),
-                  PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), 1, conv_quant_info.at(0).first, conv_quant_info.at(0).second)
-              .set_name("conv0");
-        graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name("conv0/relu");
-        graph << get_node_A_qasymm(graph, data_path, "conv1", 64U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(0),
-                                   point_quant_info.at(0));
-        graph << get_node_A_qasymm(graph, data_path, "conv2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(1),
-                                   point_quant_info.at(1));
-        graph << get_node_A_qasymm(graph, data_path, "conv3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(2),
-                                   point_quant_info.at(2));
-        graph << get_node_A_qasymm(graph, data_path, "conv4", 256U, PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(3),
-                                   point_quant_info.at(3));
-        graph << get_node_A_qasymm(graph, data_path, "conv5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(4),
-                                   point_quant_info.at(4));
-        graph << get_node_A_qasymm(graph, data_path, "conv6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(5),
-                                   point_quant_info.at(5));
-        graph << get_node_A_qasymm(graph, data_path, "conv7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(6),
-                                   point_quant_info.at(6));
-        graph << get_node_A_qasymm(graph, data_path, "conv8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(7),
-                                   point_quant_info.at(7));
-        graph << get_node_A_qasymm(graph, data_path, "conv9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(8),
-                                   point_quant_info.at(8));
-        graph << get_node_A_qasymm(graph, data_path, "conv10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(9),
-                                   point_quant_info.at(9));
-        graph << get_node_A_qasymm(graph, data_path, "conv11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(10),
-                                   point_quant_info.at(10));
+        graph << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "conv0_w.npy"),
+                                  get_weights_accessor(data_path, "conv0_b.npy"),
+                                  PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), 1,
+                                  conv_quant_info.at(0).first, conv_quant_info.at(0).second)
+                     .set_name("conv0");
+        graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f))
+                     .set_name("conv0/relu");
+        graph << get_node_A_qasymm(graph, data_path, "conv1", 64U,
+                                   PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(0), point_quant_info.at(0));
+        graph << get_node_A_qasymm(graph, data_path, "conv2", 128U,
+                                   PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(1), point_quant_info.at(1));
+        graph << get_node_A_qasymm(graph, data_path, "conv3", 128U,
+                                   PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(2), point_quant_info.at(2));
+        graph << get_node_A_qasymm(graph, data_path, "conv4", 256U,
+                                   PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(3), point_quant_info.at(3));
+        graph << get_node_A_qasymm(graph, data_path, "conv5", 256U,
+                                   PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(4), point_quant_info.at(4));
+        graph << get_node_A_qasymm(graph, data_path, "conv6", 512U,
+                                   PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(5), point_quant_info.at(5));
+        graph << get_node_A_qasymm(graph, data_path, "conv7", 512U,
+                                   PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(6), point_quant_info.at(6));
+        graph << get_node_A_qasymm(graph, data_path, "conv8", 512U,
+                                   PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(7), point_quant_info.at(7));
+        graph << get_node_A_qasymm(graph, data_path, "conv9", 512U,
+                                   PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(8), point_quant_info.at(8));
+        graph << get_node_A_qasymm(graph, data_path, "conv10", 512U,
+                                   PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(9), point_quant_info.at(9));
+        graph << get_node_A_qasymm(graph, data_path, "conv11", 512U,
+                                   PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                   PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(10), point_quant_info.at(10));
 
         SubStream conv_13(graph);
-        conv_13 << get_node_A_qasymm(graph, data_path, "conv12", 1024U, PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(11),
-                                     point_quant_info.at(11));
-        conv_13 << get_node_A_qasymm(conv_13, data_path, "conv13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(12),
-                                     point_quant_info.at(12));
+        conv_13 << get_node_A_qasymm(graph, data_path, "conv12", 1024U,
+                                     PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(11), point_quant_info.at(11));
+        conv_13 << get_node_A_qasymm(conv_13, data_path, "conv13", 1024U,
+                                     PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                     PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(12), point_quant_info.at(12));
         SubStream conv_14(conv_13);
-        conv_14 << get_node_B_qasymm(conv_13, data_path, "conv13_2", 512U, PadStrideInfo(1U, 1U, 0U, 0U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), conv_quant_info.at(1),
-                                     conv_quant_info.at(2));
+        conv_14 << get_node_B_qasymm(conv_13, data_path, "conv13_2", 512U, PadStrideInfo(1U, 1U, 0U, 0U),
+                                     PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL),
+                                     conv_quant_info.at(1), conv_quant_info.at(2));
         SubStream conv_15(conv_14);
-        conv_15 << get_node_B_qasymm(conv_14, data_path, "conv13_3", 256U, PadStrideInfo(1U, 1U, 0U, 0U), PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), conv_quant_info.at(3),
-                                     conv_quant_info.at(4));
+        conv_15 << get_node_B_qasymm(conv_14, data_path, "conv13_3", 256U, PadStrideInfo(1U, 1U, 0U, 0U),
+                                     PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                     conv_quant_info.at(3), conv_quant_info.at(4));
         SubStream conv_16(conv_15);
-        conv_16 << get_node_B_qasymm(conv_15, data_path, "conv13_4", 256U, PadStrideInfo(1U, 1U, 0U, 0U), PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), conv_quant_info.at(5),
-                                     conv_quant_info.at(6));
+        conv_16 << get_node_B_qasymm(conv_15, data_path, "conv13_4", 256U, PadStrideInfo(1U, 1U, 0U, 0U),
+                                     PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL),
+                                     conv_quant_info.at(5), conv_quant_info.at(6));
         SubStream conv_17(conv_16);
-        conv_17 << get_node_B_qasymm(conv_16, data_path, "conv13_5", 128U, PadStrideInfo(1U, 1U, 0U, 0U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), conv_quant_info.at(7),
-                                     conv_quant_info.at(8));
+        conv_17 << get_node_B_qasymm(conv_16, data_path, "conv13_5", 128U, PadStrideInfo(1U, 1U, 0U, 0U),
+                                     PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL),
+                                     conv_quant_info.at(7), conv_quant_info.at(8));
 
         // box_predictor
-        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> box_enc_pred_quant_info =
-        {
-            { QuantizationInfo(0.005202020984143019f, 136), QuantizationInfo(0.08655580133199692f, 183) },   // boxpredictor0_bep
-            { QuantizationInfo(0.003121797926723957f, 132), QuantizationInfo(0.03218776360154152f, 140) },   // boxpredictor1_bep
-            { QuantizationInfo(0.002995674265548587f, 130), QuantizationInfo(0.029072262346744537f, 125) },  // boxpredictor2_bep
-            { QuantizationInfo(0.0023131705820560455f, 130), QuantizationInfo(0.026488754898309708f, 127) }, // boxpredictor3_bep
-            { QuantizationInfo(0.0013905081432312727f, 132), QuantizationInfo(0.0199890099465847f, 137) },   // boxpredictor4_bep
-            { QuantizationInfo(0.00216794665902853f, 121), QuantizationInfo(0.019798893481492996f, 151) }    // boxpredictor5_bep
+        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> box_enc_pred_quant_info = {
+            {QuantizationInfo(0.005202020984143019f, 136),
+             QuantizationInfo(0.08655580133199692f, 183)}, // boxpredictor0_bep
+            {QuantizationInfo(0.003121797926723957f, 132),
+             QuantizationInfo(0.03218776360154152f, 140)}, // boxpredictor1_bep
+            {QuantizationInfo(0.002995674265548587f, 130),
+             QuantizationInfo(0.029072262346744537f, 125)}, // boxpredictor2_bep
+            {QuantizationInfo(0.0023131705820560455f, 130),
+             QuantizationInfo(0.026488754898309708f, 127)}, // boxpredictor3_bep
+            {QuantizationInfo(0.0013905081432312727f, 132),
+             QuantizationInfo(0.0199890099465847f, 137)}, // boxpredictor4_bep
+            {QuantizationInfo(0.00216794665902853f, 121),
+             QuantizationInfo(0.019798893481492996f, 151)} // boxpredictor5_bep
         };
 
         const std::vector<TensorShape> box_reshape = // NHWC
-        {
-            TensorShape(4U, 1U, 1083U), // boxpredictor0_bep_reshape
-            TensorShape(4U, 1U, 600U),  // boxpredictor1_bep_reshape
-            TensorShape(4U, 1U, 150U),  // boxpredictor2_bep_reshape
-            TensorShape(4U, 1U, 54U),   // boxpredictor3_bep_reshape
-            TensorShape(4U, 1U, 24U),   // boxpredictor4_bep_reshape
-            TensorShape(4U, 1U, 6U)     // boxpredictor5_bep_reshape
-        };
+            {
+                TensorShape(4U, 1U, 1083U), // boxpredictor0_bep_reshape
+                TensorShape(4U, 1U, 600U),  // boxpredictor1_bep_reshape
+                TensorShape(4U, 1U, 150U),  // boxpredictor2_bep_reshape
+                TensorShape(4U, 1U, 54U),   // boxpredictor3_bep_reshape
+                TensorShape(4U, 1U, 24U),   // boxpredictor4_bep_reshape
+                TensorShape(4U, 1U, 6U)     // boxpredictor5_bep_reshape
+            };
 
         SubStream conv_11_box_enc_pre(graph);
-        conv_11_box_enc_pre << get_node_C_qasymm(graph, data_path, "BoxPredictor_0_BEP", 12U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(0), box_reshape.at(0));
+        conv_11_box_enc_pre << get_node_C_qasymm(graph, data_path, "BoxPredictor_0_BEP", 12U,
+                                                 PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(0),
+                                                 box_reshape.at(0));
 
         SubStream conv_13_box_enc_pre(conv_13);
-        conv_13_box_enc_pre << get_node_C_qasymm(conv_13, data_path, "BoxPredictor_1_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(1), box_reshape.at(1));
+        conv_13_box_enc_pre << get_node_C_qasymm(conv_13, data_path, "BoxPredictor_1_BEP", 24U,
+                                                 PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(1),
+                                                 box_reshape.at(1));
 
         SubStream conv_14_2_box_enc_pre(conv_14);
-        conv_14_2_box_enc_pre << get_node_C_qasymm(conv_14, data_path, "BoxPredictor_2_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(2), box_reshape.at(2));
+        conv_14_2_box_enc_pre << get_node_C_qasymm(conv_14, data_path, "BoxPredictor_2_BEP", 24U,
+                                                   PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(2),
+                                                   box_reshape.at(2));
 
         SubStream conv_15_2_box_enc_pre(conv_15);
-        conv_15_2_box_enc_pre << get_node_C_qasymm(conv_15, data_path, "BoxPredictor_3_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(3), box_reshape.at(3));
+        conv_15_2_box_enc_pre << get_node_C_qasymm(conv_15, data_path, "BoxPredictor_3_BEP", 24U,
+                                                   PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(3),
+                                                   box_reshape.at(3));
 
         SubStream conv_16_2_box_enc_pre(conv_16);
-        conv_16_2_box_enc_pre << get_node_C_qasymm(conv_16, data_path, "BoxPredictor_4_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(4), box_reshape.at(4));
+        conv_16_2_box_enc_pre << get_node_C_qasymm(conv_16, data_path, "BoxPredictor_4_BEP", 24U,
+                                                   PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(4),
+                                                   box_reshape.at(4));
 
         SubStream conv_17_2_box_enc_pre(conv_17);
-        conv_17_2_box_enc_pre << get_node_C_qasymm(conv_17, data_path, "BoxPredictor_5_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(5), box_reshape.at(5));
+        conv_17_2_box_enc_pre << get_node_C_qasymm(conv_17, data_path, "BoxPredictor_5_BEP", 24U,
+                                                   PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(5),
+                                                   box_reshape.at(5));
 
         SubStream              box_enc_pre(graph);
         const QuantizationInfo bep_concate_qinfo = QuantizationInfo(0.08655580133199692f, 183);
-        box_enc_pre << ConcatLayer(arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::HEIGHT, bep_concate_qinfo),
-                                   std::move(conv_11_box_enc_pre), std::move(conv_13_box_enc_pre), conv_14_2_box_enc_pre, std::move(conv_15_2_box_enc_pre),
+        box_enc_pre << ConcatLayer(arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::HEIGHT,
+                                                                                          bep_concate_qinfo),
+                                   std::move(conv_11_box_enc_pre), std::move(conv_13_box_enc_pre),
+                                   conv_14_2_box_enc_pre, std::move(conv_15_2_box_enc_pre),
                                    std::move(conv_16_2_box_enc_pre), std::move(conv_17_2_box_enc_pre))
-                    .set_name("BoxPredictor/concat");
+                           .set_name("BoxPredictor/concat");
         box_enc_pre << ReshapeLayer(TensorShape(4U, 1917U)).set_name("BoxPredictor/reshape");
 
         // class_predictor
-        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> class_pred_quant_info =
-        {
-            { QuantizationInfo(0.002744135679677129f, 125), QuantizationInfo(0.05746262148022652f, 234) },   // boxpredictor0_cp
-            { QuantizationInfo(0.0024326108396053314f, 80), QuantizationInfo(0.03764628246426582f, 217) },   // boxpredictor1_cp
-            { QuantizationInfo(0.0013898586621508002f, 141), QuantizationInfo(0.034081317484378815f, 214) }, // boxpredictor2_cp
-            { QuantizationInfo(0.0014176908880472183f, 133), QuantizationInfo(0.033889178186655045f, 215) }, // boxpredictor3_cp
-            { QuantizationInfo(0.001090311910957098f, 125), QuantizationInfo(0.02646234817802906f, 230) },   // boxpredictor4_cp
-            { QuantizationInfo(0.001134163816459477f, 115), QuantizationInfo(0.026926767081022263f, 218) }   // boxpredictor5_cp
+        const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> class_pred_quant_info = {
+            {QuantizationInfo(0.002744135679677129f, 125),
+             QuantizationInfo(0.05746262148022652f, 234)}, // boxpredictor0_cp
+            {QuantizationInfo(0.0024326108396053314f, 80),
+             QuantizationInfo(0.03764628246426582f, 217)}, // boxpredictor1_cp
+            {QuantizationInfo(0.0013898586621508002f, 141),
+             QuantizationInfo(0.034081317484378815f, 214)}, // boxpredictor2_cp
+            {QuantizationInfo(0.0014176908880472183f, 133),
+             QuantizationInfo(0.033889178186655045f, 215)}, // boxpredictor3_cp
+            {QuantizationInfo(0.001090311910957098f, 125),
+             QuantizationInfo(0.02646234817802906f, 230)}, // boxpredictor4_cp
+            {QuantizationInfo(0.001134163816459477f, 115),
+             QuantizationInfo(0.026926767081022263f, 218)} // boxpredictor5_cp
         };
 
-        const std::vector<TensorShape> class_reshape =
-        {
+        const std::vector<TensorShape> class_reshape = {
             TensorShape(91U, 1083U), // boxpredictor0_cp_reshape
             TensorShape(91U, 600U),  // boxpredictor1_cp_reshape
             TensorShape(91U, 150U),  // boxpredictor2_cp_reshape
@@ -636,60 +720,81 @@ private:
         };
 
         SubStream conv_11_class_pre(graph);
-        conv_11_class_pre << get_node_C_qasymm(graph, data_path, "BoxPredictor_0_CP", 273U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(0), class_reshape.at(0));
+        conv_11_class_pre << get_node_C_qasymm(graph, data_path, "BoxPredictor_0_CP", 273U,
+                                               PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(0),
+                                               class_reshape.at(0));
 
         SubStream conv_13_class_pre(conv_13);
-        conv_13_class_pre << get_node_C_qasymm(conv_13, data_path, "BoxPredictor_1_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(1), class_reshape.at(1));
+        conv_13_class_pre << get_node_C_qasymm(conv_13, data_path, "BoxPredictor_1_CP", 546U,
+                                               PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(1),
+                                               class_reshape.at(1));
 
         SubStream conv_14_2_class_pre(conv_14);
-        conv_14_2_class_pre << get_node_C_qasymm(conv_14, data_path, "BoxPredictor_2_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(2), class_reshape.at(2));
+        conv_14_2_class_pre << get_node_C_qasymm(conv_14, data_path, "BoxPredictor_2_CP", 546U,
+                                                 PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(2),
+                                                 class_reshape.at(2));
 
         SubStream conv_15_2_class_pre(conv_15);
-        conv_15_2_class_pre << get_node_C_qasymm(conv_15, data_path, "BoxPredictor_3_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(3), class_reshape.at(3));
+        conv_15_2_class_pre << get_node_C_qasymm(conv_15, data_path, "BoxPredictor_3_CP", 546U,
+                                                 PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(3),
+                                                 class_reshape.at(3));
 
         SubStream conv_16_2_class_pre(conv_16);
-        conv_16_2_class_pre << get_node_C_qasymm(conv_16, data_path, "BoxPredictor_4_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(4), class_reshape.at(4));
+        conv_16_2_class_pre << get_node_C_qasymm(conv_16, data_path, "BoxPredictor_4_CP", 546U,
+                                                 PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(4),
+                                                 class_reshape.at(4));
 
         SubStream conv_17_2_class_pre(conv_17);
-        conv_17_2_class_pre << get_node_C_qasymm(conv_17, data_path, "BoxPredictor_5_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(5), class_reshape.at(5));
+        conv_17_2_class_pre << get_node_C_qasymm(conv_17, data_path, "BoxPredictor_5_CP", 546U,
+                                                 PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(5),
+                                                 class_reshape.at(5));
 
         const QuantizationInfo cp_concate_qinfo = QuantizationInfo(0.0584389753639698f, 230);
         SubStream              class_pred(graph);
-        class_pred << ConcatLayer(
-                       arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::WIDTH, cp_concate_qinfo),
-                       std::move(conv_11_class_pre), std::move(conv_13_class_pre), std::move(conv_14_2_class_pre),
-                       std::move(conv_15_2_class_pre), std::move(conv_16_2_class_pre), std::move(conv_17_2_class_pre))
-                   .set_name("ClassPrediction/concat");
-
-        const QuantizationInfo logistic_out_qinfo = QuantizationInfo(0.00390625f, 0);
-        class_pred << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), logistic_out_qinfo).set_name("ClassPrediction/logistic");
-
-        const int   max_detections            = 10;
-        const int   max_classes_per_detection = 1;
-        const float nms_score_threshold       = 0.30000001192092896f;
-        const float nms_iou_threshold         = 0.6000000238418579f;
-        const int   num_classes               = 90;
-        const float x_scale                   = 10.f;
-        const float y_scale                   = 10.f;
-        const float h_scale                   = 5.f;
-        const float w_scale                   = 5.f;
-        std::array<float, 4> scales = { y_scale, x_scale, w_scale, h_scale };
-        const QuantizationInfo anchors_qinfo = QuantizationInfo(0.006453060545027256f, 0);
+        class_pred << ConcatLayer(arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::WIDTH,
+                                                                                         cp_concate_qinfo),
+                                  std::move(conv_11_class_pre), std::move(conv_13_class_pre),
+                                  std::move(conv_14_2_class_pre), std::move(conv_15_2_class_pre),
+                                  std::move(conv_16_2_class_pre), std::move(conv_17_2_class_pre))
+                          .set_name("ClassPrediction/concat");
+
+        const QuantizationInfo logistic_out_qinfo = QuantizationInfo(
+            0.00390625f, quantization::get_min_max_values_from_quantized_data_type(common_params.data_type).first);
+        class_pred << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC),
+                                      logistic_out_qinfo)
+                          .set_name("ClassPrediction/logistic");
+
+        const int              max_detections            = 10;
+        const int              max_classes_per_detection = 1;
+        const float            nms_score_threshold       = 0.30000001192092896f;
+        const float            nms_iou_threshold         = 0.6000000238418579f;
+        const int              num_classes               = 90;
+        const float            x_scale                   = 10.f;
+        const float            y_scale                   = 10.f;
+        const float            h_scale                   = 5.f;
+        const float            w_scale                   = 5.f;
+        std::array<float, 4>   scales                    = {y_scale, x_scale, w_scale, h_scale};
+        const QuantizationInfo anchors_qinfo             = QuantizationInfo(0.006453060545027256f, 0);
 
         SubStream detection_ouput(box_enc_pre);
         detection_ouput << DetectionPostProcessLayer(std::move(class_pred),
-                                                     DetectionPostProcessLayerInfo(max_detections, max_classes_per_detection, nms_score_threshold, nms_iou_threshold, num_classes, scales),
+                                                     DetectionPostProcessLayerInfo(
+                                                         max_detections, max_classes_per_detection, nms_score_threshold,
+                                                         nms_iou_threshold, num_classes, scales),
                                                      get_weights_accessor(data_path, "anchors.npy"), anchors_qinfo)
-                        .set_name("DetectionPostProcess");
+                               .set_name("DetectionPostProcess");
 
         SubStream ouput_0(detection_ouput);
-        ouput_0 << OutputLayer(get_npy_output_accessor(detection_boxes_opt->value(), TensorShape(4U, 10U), DataType::F32), 0);
+        ouput_0 << OutputLayer(
+            get_npy_output_accessor(detection_boxes_opt->value(), TensorShape(4U, 10U), DataType::F32), 0);
 
         SubStream ouput_1(detection_ouput);
-        ouput_1 << OutputLayer(get_npy_output_accessor(detection_classes_opt->value(), TensorShape(10U), DataType::F32), 1);
+        ouput_1 << OutputLayer(get_npy_output_accessor(detection_classes_opt->value(), TensorShape(10U), DataType::F32),
+                               1);
 
         SubStream ouput_2(detection_ouput);
-        ouput_2 << OutputLayer(get_npy_output_accessor(detection_scores_opt->value(), TensorShape(10U), DataType::F32), 2);
+        ouput_2 << OutputLayer(get_npy_output_accessor(detection_scores_opt->value(), TensorShape(10U), DataType::F32),
+                               2);
 
         SubStream ouput_3(detection_ouput);
         ouput_3 << OutputLayer(get_npy_output_accessor(num_detections_opt->value(), TensorShape(1U), DataType::F32), 3);
diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp
index f6996dadd5..72ac9694b1 100644
--- a/examples/graph_vgg16.cpp
+++ b/examples/graph_vgg16.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,22 +36,11 @@ using namespace arm_compute::graph_utils;
 class GraphVGG16Example : public Example
 {
 public:
-    GraphVGG16Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG16")
+    GraphVGG16Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG16")
     {
     }
     bool do_setup(int argc, char **argv) override
     {
-        // Check if the system has enough RAM to run the example, systems with less than 2GB have
-        // to hint the API to minimize memory consumption otherwise it'll run out of memory and
-        // fail throwing the bad_alloc exception
-        arm_compute::MEMInfo meminfo;
-        const size_t         mem_total = meminfo.get_total_in_kb();
-        if(mem_total <= arm_compute::MEMInfo::TWO_GB_IN_KB)
-        {
-            arm_compute::MEMInfo::set_policy(arm_compute::MemoryPolicy::MINIMIZE);
-        }
-
         // Parse arguments
         cmd_parser.parse(argc, argv);
         cmd_parser.validate();
@@ -59,7 +49,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -72,161 +62,153 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        const std::array<float, 3> mean_rgb{ { 123.68f, 116.779f, 103.939f } };
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+        const std::array<float, 3>     mean_rgb{{123.68f, 116.779f, 103.939f}};
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb);
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
         // Create graph
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
-              // Layer 1
-              << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv1_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu")
-              // Layer 2
-              << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv1_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_2/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
-              // Layer 3
-              << ConvolutionLayer(
-                  3U, 3U, 128U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv2_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_1/Relu")
-              // Layer 4
-              << ConvolutionLayer(
-                  3U, 3U, 128U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv2_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_2/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
-              // Layer 5
-              << ConvolutionLayer(
-                  3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv3_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_1/Relu")
-              // Layer 6
-              << ConvolutionLayer(
-                  3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv3_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_2/Relu")
-              // Layer 7
-              << ConvolutionLayer(
-                  3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv3_3")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_3/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool3")
-              // Layer 8
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv4_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_1/Relu")
-              // Layer 9
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv4_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_2/Relu")
-              // Layer 10
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv4_3")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_3/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool4")
-              // Layer 11
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv5_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_1/Relu")
-              // Layer 12
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv5_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_2/Relu")
-              // Layer 13
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv5_3")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_3/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool5")
-              // Layer 14
-              << FullyConnectedLayer(
-                  4096U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_b.npy"))
-              .set_name("fc6")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu")
-              // Layer 15
-              << FullyConnectedLayer(
-                  4096U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_b.npy"))
-              .set_name("fc7")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1")
-              // Layer 16
-              << FullyConnectedLayer(
-                  1000U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_b.npy"))
-              .set_name("fc8")
-              // Softmax
-              << SoftmaxLayer().set_name("prob")
-              << OutputLayer(get_output_accessor(common_params, 5));
+        graph
+            << common_params.target << common_params.fast_math_hint
+            << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
+            // Layer 1
+            << ConvolutionLayer(
+                   3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv1_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv1_1/Relu")
+            // Layer 2
+            << ConvolutionLayer(
+                   3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv1_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv1_2/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool1")
+            // Layer 3
+            << ConvolutionLayer(
+                   3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv2_1/Relu")
+            // Layer 4
+            << ConvolutionLayer(
+                   3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv2_2/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool2")
+            // Layer 5
+            << ConvolutionLayer(
+                   3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv3_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv3_1/Relu")
+            // Layer 6
+            << ConvolutionLayer(
+                   3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv3_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv3_2/Relu")
+            // Layer 7
+            << ConvolutionLayer(
+                   3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv3_3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv3_3/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool3")
+            // Layer 8
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv4_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv4_1/Relu")
+            // Layer 9
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv4_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv4_2/Relu")
+            // Layer 10
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv4_3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv4_3/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool4")
+            // Layer 11
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv5_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv5_1/Relu")
+            // Layer 12
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv5_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv5_2/Relu")
+            // Layer 13
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv5_3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv5_3/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool5")
+            // Layer 14
+            << FullyConnectedLayer(4096U,
+                                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_w.npy", weights_layout),
+                                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_b.npy"))
+                   .set_name("fc6")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu")
+            // Layer 15
+            << FullyConnectedLayer(4096U,
+                                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_w.npy", weights_layout),
+                                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_b.npy"))
+                   .set_name("fc7")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1")
+            // Layer 16
+            << FullyConnectedLayer(1000U,
+                                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_w.npy", weights_layout),
+                                   get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_b.npy"))
+                   .set_name("fc8")
+            // Softmax
+            << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         graph.finalize(common_params.target, config);
 
diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp
index f9f5c213d5..9293544655 100644
--- a/examples/graph_vgg19.cpp
+++ b/examples/graph_vgg19.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 ARM Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -34,22 +35,11 @@ using namespace arm_compute::graph_utils;
 class GraphVGG19Example : public Example
 {
 public:
-    GraphVGG19Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG19")
+    GraphVGG19Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG19")
     {
     }
     bool do_setup(int argc, char **argv) override
     {
-        // Check if the system has enough RAM to run the example, systems with less than 2GB have
-        // to hint the API to minimize memory consumption otherwise it'll run out of memory and
-        // fail throwing the bad_alloc exception
-        arm_compute::MEMInfo meminfo;
-        const size_t         mem_total = meminfo.get_total_in_kb();
-        if(mem_total <= arm_compute::MEMInfo::TWO_GB_IN_KB)
-        {
-            arm_compute::MEMInfo::set_policy(arm_compute::MemoryPolicy::MINIMIZE);
-        }
-
         // Parse arguments
         cmd_parser.parse(argc, argv);
         cmd_parser.validate();
@@ -58,7 +48,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -71,173 +61,162 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        const std::array<float, 3> mean_rgb{ { 123.68f, 116.779f, 103.939f } };
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb);
+        const std::array<float, 3>     mean_rgb{{123.68f, 116.779f, 103.939f}};
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb);
 
         // Create input descriptor
         const auto        operation_layout = common_params.data_layout;
-        const TensorShape tensor_shape     = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
-              << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
-              // Layer 1
-              << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv1_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv1_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_2/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool1")
-              // Layer 2
-              << ConvolutionLayer(
-                  3U, 3U, 128U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv2_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_1/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 128U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv2_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_2/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool2")
-              // Layer 3
-              << ConvolutionLayer(
-                  3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv3_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_1/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv3_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_2/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv3_3")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_3/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv3_4")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_4/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool3")
-              // Layer 4
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv4_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_1/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv4_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_2/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv4_3")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_3/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv4_4")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_4/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool4")
-              // Layer 5
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv5_1")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_1/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv5_2")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_2/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv5_3")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_3/Relu")
-              << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv5_4")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_4/Relu")
-              << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool5")
-              // Layer 6
-              << FullyConnectedLayer(
-                  4096U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_b.npy"))
-              .set_name("fc6")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu")
-              // Layer 7
-              << FullyConnectedLayer(
-                  4096U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_b.npy"))
-              .set_name("fc7")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1")
-              // Layer 8
-              << FullyConnectedLayer(
-                  1000U,
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_b.npy"))
-              .set_name("fc8")
-              // Softmax
-              << SoftmaxLayer().set_name("prob")
-              << OutputLayer(get_output_accessor(common_params, 5));
+        graph
+            << common_params.target << common_params.fast_math_hint
+            << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor)))
+            // Layer 1
+            << ConvolutionLayer(
+                   3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv1_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv1_1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv1_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv1_2/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool1")
+            // Layer 2
+            << ConvolutionLayer(
+                   3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv2_1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv2_2/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool2")
+            // Layer 3
+            << ConvolutionLayer(
+                   3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv3_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv3_1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv3_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv3_2/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv3_3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv3_3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv3_4")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv3_4/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool3")
+            // Layer 4
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv4_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv4_1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv4_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv4_2/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv4_3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv4_3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv4_4")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv4_4/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool4")
+            // Layer 5
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv5_1")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv5_1/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv5_2")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv5_2/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv5_3")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv5_3/Relu")
+            << ConvolutionLayer(
+                   3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_b.npy"), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv5_4")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                   .set_name("conv5_4/Relu")
+            << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0)))
+                   .set_name("pool5")
+            // Layer 6
+            << FullyConnectedLayer(4096U,
+                                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_w.npy", weights_layout),
+                                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_b.npy"))
+                   .set_name("fc6")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu")
+            // Layer 7
+            << FullyConnectedLayer(4096U,
+                                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_w.npy", weights_layout),
+                                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_b.npy"))
+                   .set_name("fc7")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1")
+            // Layer 8
+            << FullyConnectedLayer(1000U,
+                                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_w.npy", weights_layout),
+                                   get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_b.npy"))
+                   .set_name("fc8")
+            // Softmax
+            << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         graph.finalize(common_params.target, config);
 
diff --git a/examples/graph_vgg_vdsr.cpp b/examples/graph_vgg_vdsr.cpp
index c308236f5b..a6cd337f82 100644
--- a/examples/graph_vgg_vdsr.cpp
+++ b/examples/graph_vgg_vdsr.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2020 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -36,8 +37,7 @@ using namespace arm_compute::graph_utils;
 class GraphVDSRExample : public Example
 {
 public:
-    GraphVDSRExample()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VDSR")
+    GraphVDSRExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VDSR")
     {
         model_input_width  = cmd_parser.add_option<SimpleOption<unsigned int>>("image-width", 192);
         model_input_height = cmd_parser.add_option<SimpleOption<unsigned int>>("image-height", 192);
@@ -46,7 +46,7 @@ public:
         model_input_width->set_help("Input image width.");
         model_input_height->set_help("Input image height.");
     }
-    GraphVDSRExample(const GraphVDSRExample &) = delete;
+    GraphVDSRExample(const GraphVDSRExample &)            = delete;
     GraphVDSRExample &operator=(const GraphVDSRExample &) = delete;
     ~GraphVDSRExample() override                          = default;
     bool do_setup(int argc, char **argv) override
@@ -59,7 +59,7 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
@@ -79,18 +79,20 @@ public:
         const std::string model_path = "/cnn_data/vdsr_model/";
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>();
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>();
 
         // Create input descriptor
-        const TensorShape tensor_shape     = permute_shape(TensorShape(image_width, image_height, 1U, 1U), DataLayout::NCHW, common_params.data_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(image_width, image_height, 1U, common_params.batches), DataLayout::NCHW,
+                          common_params.data_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
         // Note: Quantization info are random and used only for benchmarking purposes
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor.set_quantization_info(QuantizationInfo(0.0078125f, 128)),
                             get_input_accessor(common_params, std::move(preprocessor), false));
 
@@ -98,49 +100,48 @@ public:
         SubStream right(graph);
 
         // Layer 1
-        right << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, "conv0_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv0_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1), 1, QuantizationInfo(0.031778190285f, 156), QuantizationInfo(0.0784313753247f, 128))
-              .set_name("conv0")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/Relu");
+        right << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, "conv0_w.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv0_b.npy"), PadStrideInfo(1, 1, 1, 1), 1,
+                                  QuantizationInfo(0.031778190285f, 156), QuantizationInfo(0.0784313753247f, 128))
+                     .set_name("conv0")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv0/Relu");
 
         // Rest 17 layers
-        for(unsigned int i = 1; i < 19; ++i)
+        for (unsigned int i = 1; i < 19; ++i)
         {
             const std::string conv_w_path = "conv" + arm_compute::support::cpp11::to_string(i) + "_w.npy";
             const std::string conv_b_path = "conv" + arm_compute::support::cpp11::to_string(i) + "_b.npy";
             const std::string conv_name   = "conv" + arm_compute::support::cpp11::to_string(i);
-            right << ConvolutionLayer(
-                      3U, 3U, 64U,
-                      get_weights_accessor(data_path, conv_w_path, weights_layout),
-                      get_weights_accessor(data_path, conv_b_path),
-                      PadStrideInfo(1, 1, 1, 1), 1, QuantizationInfo(0.015851572156f, 93))
-                  .set_name(conv_name)
-                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(conv_name + "/Relu");
+            right << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, conv_w_path, weights_layout),
+                                      get_weights_accessor(data_path, conv_b_path), PadStrideInfo(1, 1, 1, 1), 1,
+                                      QuantizationInfo(0.015851572156f, 93))
+                         .set_name(conv_name)
+                  << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                         .set_name(conv_name + "/Relu");
         }
 
         // Final layer
-        right << ConvolutionLayer(
-                  3U, 3U, 1U,
-                  get_weights_accessor(data_path, "conv20_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "conv20_b.npy"),
-                  PadStrideInfo(1, 1, 1, 1), 1, QuantizationInfo(0.015851572156f, 93))
-              .set_name("conv20")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv20/Relu");
+        right << ConvolutionLayer(3U, 3U, 1U, get_weights_accessor(data_path, "conv20_w.npy", weights_layout),
+                                  get_weights_accessor(data_path, "conv20_b.npy"), PadStrideInfo(1, 1, 1, 1), 1,
+                                  QuantizationInfo(0.015851572156f, 93))
+                     .set_name("conv20")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU))
+                     .set_name("conv20/Relu");
 
         // Add residual to input
         graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name("add")
-              << OutputLayer(arm_compute::support::cpp14::make_unique<DummyAccessor>(0));
+              << OutputLayer(std::make_unique<DummyAccessor>(0));
 
         // Finalize graph
         GraphConfig config;
-        config.num_threads      = common_params.threads;
-        config.use_tuner        = common_params.enable_tuner;
-        config.tuner_mode       = common_params.tuner_mode;
-        config.tuner_file       = common_params.tuner_file;
-        config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8);
+        config.num_threads        = common_params.threads;
+        config.use_tuner          = common_params.enable_tuner;
+        config.tuner_mode         = common_params.tuner_mode;
+        config.tuner_file         = common_params.tuner_file;
+        config.mlgo_file          = common_params.mlgo_file;
+        config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type);
+        config.synthetic_type     = common_params.data_type;
 
         graph.finalize(common_params.target, config);
 
@@ -155,8 +156,8 @@ public:
 private:
     CommandLineParser           cmd_parser;
     CommonGraphOptions          common_opts;
-    SimpleOption<unsigned int> *model_input_width{ nullptr };
-    SimpleOption<unsigned int> *model_input_height{ nullptr };
+    SimpleOption<unsigned int> *model_input_width{nullptr};
+    SimpleOption<unsigned int> *model_input_height{nullptr};
     CommonGraphParams           common_params;
     Stream                      graph;
 };
diff --git a/examples/graph_yolov3.cpp b/examples/graph_yolov3.cpp
index bbc6b729d1..5c8d3426ec 100644
--- a/examples/graph_yolov3.cpp
+++ b/examples/graph_yolov3.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,6 +22,7 @@
  * SOFTWARE.
  */
 #include "arm_compute/graph.h"
+
 #include "support/ToolchainSupport.h"
 #include "utils/CommonGraphOptions.h"
 #include "utils/GraphUtils.h"
@@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils;
 class GraphYOLOv3Example : public Example
 {
 public:
-    GraphYOLOv3Example()
-        : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "YOLOv3")
+    GraphYOLOv3Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "YOLOv3")
     {
     }
 
@@ -50,14 +50,15 @@ public:
         common_params = consume_common_graph_parameters(common_opts);
 
         // Return when help menu is requested
-        if(common_params.help)
+        if (common_params.help)
         {
             cmd_parser.print_help(argv[0]);
             return false;
         }
 
         // Checks
-        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph");
+        ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type),
+                                "QASYMM8 not supported for this graph");
 
         // Print parameter values
         std::cout << common_params << std::endl;
@@ -66,334 +67,325 @@ public:
         std::string data_path = common_params.data_path;
 
         // Create a preprocessor object
-        std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(0.f);
+        std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(0.f);
 
         // Create input descriptor
-        const TensorShape tensor_shape     = permute_shape(TensorShape(608U, 608U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
-        TensorDescriptor  input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
+        const TensorShape tensor_shape =
+            permute_shape(TensorShape(608U, 608U, 3U, 1U), DataLayout::NCHW, common_params.data_layout);
+        TensorDescriptor input_descriptor =
+            TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout);
 
         // Set weights trained layout
         const DataLayout weights_layout = DataLayout::NCHW;
 
-        graph << common_params.target
-              << common_params.fast_math_hint
+        graph << common_params.target << common_params.fast_math_hint
               << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false));
         std::pair<SubStream, SubStream> intermediate_layers = darknet53(data_path, weights_layout);
-        graph << ConvolutionLayer(
-                  1U, 1U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_53_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("conv2d_53")
-              << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_53/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_53/LeakyRelu")
-              << ConvolutionLayer(
-                  3U, 3U, 1024U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_54_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv2d_54")
-              << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_54/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_54/LeakyRelu")
-              << ConvolutionLayer(
-                  1U, 1U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_55_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("conv2d_55")
-              << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_55/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_55/LeakyRelu")
-              << ConvolutionLayer(
-                  3U, 3U, 1024U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_56_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv2d_56")
-              << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_56/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_56/LeakyRelu")
-              << ConvolutionLayer(
-                  1U, 1U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_57_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("conv2d_57")
-              << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_57/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_57/LeakyRelu");
+        graph
+            << ConvolutionLayer(
+                   1U, 1U, 512U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_53_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_53")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_beta.npy"), 0.000001f)
+                   .set_name("conv2d_53/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_53/LeakyRelu")
+            << ConvolutionLayer(
+                   3U, 3U, 1024U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_54_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_54")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_beta.npy"), 0.000001f)
+                   .set_name("conv2d_54/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_54/LeakyRelu")
+            << ConvolutionLayer(
+                   1U, 1U, 512U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_55_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_55")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_beta.npy"), 0.000001f)
+                   .set_name("conv2d_55/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_55/LeakyRelu")
+            << ConvolutionLayer(
+                   3U, 3U, 1024U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_56_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_56")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_beta.npy"), 0.000001f)
+                   .set_name("conv2d_56/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_56/LeakyRelu")
+            << ConvolutionLayer(
+                   1U, 1U, 512U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_57_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_57")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_beta.npy"), 0.000001f)
+                   .set_name("conv2d_57/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_57/LeakyRelu");
         SubStream route_1(graph);
-        graph << ConvolutionLayer(
-                  3U, 3U, 1024U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_58_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv2d_58")
-              << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_58/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_58/LeakyRelu")
-              << ConvolutionLayer(
-                  1U, 1U, 255U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_59_w.npy", weights_layout),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_59_b.npy", weights_layout),
-                  PadStrideInfo(1, 1, 0, 0))
-              .set_name("conv2d_59")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f)).set_name("conv2d_59/Linear")
-              << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f), 80).set_name("Yolo1")
-              << OutputLayer(get_output_accessor(common_params, 5));
+        graph
+            << ConvolutionLayer(
+                   3U, 3U, 1024U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_58_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_58")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_beta.npy"), 0.000001f)
+                   .set_name("conv2d_58/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_58/LeakyRelu")
+            << ConvolutionLayer(
+                   1U, 1U, 255U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_59_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_59_b.npy", weights_layout),
+                   PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_59")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f))
+                   .set_name("conv2d_59/Linear")
+            << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f)).set_name("Yolo1")
+            << OutputLayer(get_output_accessor(common_params, 5));
         route_1 << ConvolutionLayer(
-                    1U, 1U, 256U,
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_60_w.npy", weights_layout),
-                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                    PadStrideInfo(1, 1, 0, 0))
-                .set_name("conv2d_60")
+                       1U, 1U, 256U,
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_60_w.npy", weights_layout),
+                       std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                       .set_name("conv2d_60")
                 << BatchNormalizationLayer(
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_mean.npy"),
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_var.npy"),
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_gamma.npy"),
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_beta.npy"),
-                    0.000001f)
-                .set_name("conv2d_59/BatchNorm")
-                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_60/LeakyRelu")
-                << UpsampleLayer(Size2D(2, 2), InterpolationPolicy::NEAREST_NEIGHBOR).set_name("Upsample_60");
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_mean.npy"),
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_var.npy"),
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_gamma.npy"),
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_beta.npy"),
+                       0.000001f)
+                       .set_name("conv2d_59/BatchNorm")
+                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                       .set_name("conv2d_60/LeakyRelu")
+                << ResizeLayer(InterpolationPolicy::NEAREST_NEIGHBOR, 2, 2).set_name("Upsample_60");
         SubStream concat_1(route_1);
-        concat_1 << ConcatLayer(std::move(route_1), std::move(intermediate_layers.second)).set_name("Route1")
-                 << ConvolutionLayer(
-                     1U, 1U, 256U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_61_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name("conv2d_61")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_60/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_61/LeakyRelu")
-                 << ConvolutionLayer(
-                     3U, 3U, 512U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_62_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 1, 1))
-                 .set_name("conv2d_62")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_61/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_62/LeakyRelu")
-                 << ConvolutionLayer(
-                     1U, 1U, 256U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_63_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name("conv2d_63")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_62/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_63/LeakyRelu")
-                 << ConvolutionLayer(
-                     3U, 3U, 512U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_64_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 1, 1))
-                 .set_name("conv2d_64")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_63/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_64/LeakyRelu")
-                 << ConvolutionLayer(
-                     1U, 1U, 256U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_65_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name("conv2d_65")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_65/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_65/LeakyRelu");
+        concat_1
+            << ConcatLayer(std::move(route_1), std::move(intermediate_layers.second)).set_name("Route1")
+            << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_61_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_61")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_beta.npy"), 0.000001f)
+                   .set_name("conv2d_60/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_61/LeakyRelu")
+            << ConvolutionLayer(
+                   3U, 3U, 512U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_62_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_62")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_beta.npy"), 0.000001f)
+                   .set_name("conv2d_61/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_62/LeakyRelu")
+            << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_63_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_63")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_beta.npy"), 0.000001f)
+                   .set_name("conv2d_62/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_63/LeakyRelu")
+            << ConvolutionLayer(
+                   3U, 3U, 512U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_64_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_64")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_beta.npy"), 0.000001f)
+                   .set_name("conv2d_63/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_64/LeakyRelu")
+            << ConvolutionLayer(
+                   1U, 1U, 256U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_65_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_65")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_beta.npy"), 0.000001f)
+                   .set_name("conv2d_65/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_65/LeakyRelu");
         SubStream route_2(concat_1);
-        concat_1 << ConvolutionLayer(
-                     3U, 3U, 512U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_66_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 1, 1))
-                 .set_name("conv2d_66")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_65/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_66/LeakyRelu")
-                 << ConvolutionLayer(
-                     1U, 1U, 255U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_67_w.npy", weights_layout),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_67_b.npy", weights_layout),
-                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name("conv2d_67")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f)).set_name("conv2d_67/Linear")
-                 << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f), 80).set_name("Yolo2")
-                 << OutputLayer(get_output_accessor(common_params, 5));
+        concat_1
+            << ConvolutionLayer(
+                   3U, 3U, 512U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_66_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_66")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_beta.npy"), 0.000001f)
+                   .set_name("conv2d_65/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_66/LeakyRelu")
+            << ConvolutionLayer(
+                   1U, 1U, 255U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_67_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_67_b.npy", weights_layout),
+                   PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_67")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f))
+                   .set_name("conv2d_67/Linear")
+            << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f)).set_name("Yolo2")
+            << OutputLayer(get_output_accessor(common_params, 5));
         route_2 << ConvolutionLayer(
-                    1U, 1U, 128U,
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_68_w.npy", weights_layout),
-                    std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                    PadStrideInfo(1, 1, 0, 0))
-                .set_name("conv2d_68")
+                       1U, 1U, 128U,
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_68_w.npy", weights_layout),
+                       std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                       .set_name("conv2d_68")
                 << BatchNormalizationLayer(
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_mean.npy"),
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_var.npy"),
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_gamma.npy"),
-                    get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_beta.npy"),
-                    0.000001f)
-                .set_name("conv2d_66/BatchNorm")
-                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_68/LeakyRelu")
-                << UpsampleLayer(Size2D(2, 2), InterpolationPolicy::NEAREST_NEIGHBOR).set_name("Upsample_68");
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_mean.npy"),
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_var.npy"),
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_gamma.npy"),
+                       get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_beta.npy"),
+                       0.000001f)
+                       .set_name("conv2d_66/BatchNorm")
+                << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                       .set_name("conv2d_68/LeakyRelu")
+                << ResizeLayer(InterpolationPolicy::NEAREST_NEIGHBOR, 2, 2).set_name("Upsample_68");
         SubStream concat_2(route_2);
-        concat_2 << ConcatLayer(std::move(route_2), std::move(intermediate_layers.first)).set_name("Route2")
-                 << ConvolutionLayer(
-                     1U, 1U, 128U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_69_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name("conv2d_69")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_67/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_69/LeakyRelu")
-                 << ConvolutionLayer(
-                     3U, 3U, 256U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_70_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 1, 1))
-                 .set_name("conv2d_70")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_68/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_70/LeakyRelu")
-                 << ConvolutionLayer(
-                     1U, 1U, 128U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_71_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name("conv2d_71")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_69/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_71/LeakyRelu")
-                 << ConvolutionLayer(
-                     3U, 3U, 256U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_72_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 1, 1))
-                 .set_name("conv2d_72")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_70/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_72/LeakyRelu")
-                 << ConvolutionLayer(
-                     1U, 1U, 128U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_73_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name("conv2d_73")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_71/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_73/LeakyRelu")
-                 << ConvolutionLayer(
-                     3U, 3U, 256U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_74_w.npy", weights_layout),
-                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                     PadStrideInfo(1, 1, 1, 1))
-                 .set_name("conv2d_74")
-                 << BatchNormalizationLayer(
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_mean.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_var.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_gamma.npy"),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_beta.npy"),
-                     0.000001f)
-                 .set_name("conv2d_72/BatchNorm")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_74/LeakyRelu")
-                 << ConvolutionLayer(
-                     1U, 1U, 255U,
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_75_w.npy", weights_layout),
-                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_75_b.npy", weights_layout),
-                     PadStrideInfo(1, 1, 0, 0))
-                 .set_name("conv2d_75")
-                 << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f)).set_name("conv2d_75/Linear")
-                 << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f), 80).set_name("Yolo3")
-                 << OutputLayer(get_output_accessor(common_params, 5));
+        concat_2
+            << ConcatLayer(std::move(route_2), std::move(intermediate_layers.first)).set_name("Route2")
+            << ConvolutionLayer(
+                   1U, 1U, 128U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_69_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_69")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_beta.npy"), 0.000001f)
+                   .set_name("conv2d_67/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_69/LeakyRelu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_70_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_70")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_beta.npy"), 0.000001f)
+                   .set_name("conv2d_68/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_70/LeakyRelu")
+            << ConvolutionLayer(
+                   1U, 1U, 128U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_71_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_71")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_beta.npy"), 0.000001f)
+                   .set_name("conv2d_69/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_71/LeakyRelu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_72_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_72")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_beta.npy"), 0.000001f)
+                   .set_name("conv2d_70/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_72/LeakyRelu")
+            << ConvolutionLayer(
+                   1U, 1U, 128U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_73_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_73")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_beta.npy"), 0.000001f)
+                   .set_name("conv2d_71/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_73/LeakyRelu")
+            << ConvolutionLayer(
+                   3U, 3U, 256U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_74_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_74")
+            << BatchNormalizationLayer(
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_mean.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_var.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_gamma.npy"),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_beta.npy"), 0.000001f)
+                   .set_name("conv2d_72/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_74/LeakyRelu")
+            << ConvolutionLayer(
+                   1U, 1U, 255U,
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_75_w.npy", weights_layout),
+                   get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_75_b.npy", weights_layout),
+                   PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_75")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f))
+                   .set_name("conv2d_75/Linear")
+            << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f)).set_name("Yolo3")
+            << OutputLayer(get_output_accessor(common_params, 5));
 
         // Finalize graph
         GraphConfig config;
@@ -401,6 +393,7 @@ public:
         config.use_tuner   = common_params.enable_tuner;
         config.tuner_mode  = common_params.tuner_mode;
         config.tuner_file  = common_params.tuner_file;
+        config.mlgo_file   = common_params.mlgo_file;
 
         graph.finalize(common_params.target, config);
 
@@ -421,64 +414,64 @@ private:
     std::pair<SubStream, SubStream> darknet53(const std::string &data_path, DataLayout weights_layout)
     {
         graph << ConvolutionLayer(
-                  3U, 3U, 32U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_1_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(1, 1, 1, 1))
-              .set_name("conv2d_1/Conv2D")
+                     3U, 3U, 32U,
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_1_w.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                     .set_name("conv2d_1/Conv2D")
               << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_1/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_1/LeakyRelu")
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_mean.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_var.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_gamma.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_beta.npy"),
+                     0.000001f)
+                     .set_name("conv2d_1/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                     .set_name("conv2d_1/LeakyRelu")
               << ConvolutionLayer(
-                  3U, 3U, 64U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_2_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(2, 2, 1, 1))
-              .set_name("conv2d_2/Conv2D")
+                     3U, 3U, 64U,
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_2_w.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1))
+                     .set_name("conv2d_2/Conv2D")
               << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_2/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_2/LeakyRelu");
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_mean.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_var.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_gamma.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_beta.npy"),
+                     0.000001f)
+                     .set_name("conv2d_2/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                     .set_name("conv2d_2/LeakyRelu");
         darknet53_block(data_path, "3", weights_layout, 32U);
         graph << ConvolutionLayer(
-                  3U, 3U, 128U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_5_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(2, 2, 1, 1))
-              .set_name("conv2d_5/Conv2D")
+                     3U, 3U, 128U,
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_5_w.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1))
+                     .set_name("conv2d_5/Conv2D")
               << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_5/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_5/LeakyRelu");
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_mean.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_var.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_gamma.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_beta.npy"),
+                     0.000001f)
+                     .set_name("conv2d_5/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                     .set_name("conv2d_5/LeakyRelu");
         darknet53_block(data_path, "6", weights_layout, 64U);
         darknet53_block(data_path, "8", weights_layout, 64U);
         graph << ConvolutionLayer(
-                  3U, 3U, 256U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_10_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(2, 2, 1, 1))
-              .set_name("conv2d_10/Conv2D")
+                     3U, 3U, 256U,
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_10_w.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1))
+                     .set_name("conv2d_10/Conv2D")
               << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_10/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_10/LeakyRelu");
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_mean.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_var.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_gamma.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_beta.npy"),
+                     0.000001f)
+                     .set_name("conv2d_10/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                     .set_name("conv2d_10/LeakyRelu");
         darknet53_block(data_path, "11", weights_layout, 128U);
         darknet53_block(data_path, "13", weights_layout, 128U);
         darknet53_block(data_path, "15", weights_layout, 128U);
@@ -489,19 +482,19 @@ private:
         darknet53_block(data_path, "25", weights_layout, 128U);
         SubStream layer_36(graph);
         graph << ConvolutionLayer(
-                  3U, 3U, 512U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_27_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(2, 2, 1, 1))
-              .set_name("conv2d_27/Conv2D")
+                     3U, 3U, 512U,
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_27_w.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1))
+                     .set_name("conv2d_27/Conv2D")
               << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_27/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_27/LeakyRelu");
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_mean.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_var.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_gamma.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_beta.npy"),
+                     0.000001f)
+                     .set_name("conv2d_27/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                     .set_name("conv2d_27/LeakyRelu");
         darknet53_block(data_path, "28", weights_layout, 256U);
         darknet53_block(data_path, "30", weights_layout, 256U);
         darknet53_block(data_path, "32", weights_layout, 256U);
@@ -512,19 +505,19 @@ private:
         darknet53_block(data_path, "42", weights_layout, 256U);
         SubStream layer_61(graph);
         graph << ConvolutionLayer(
-                  3U, 3U, 1024U,
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_44_w.npy", weights_layout),
-                  std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                  PadStrideInfo(2, 2, 1, 1))
-              .set_name("conv2d_44/Conv2D")
+                     3U, 3U, 1024U,
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_44_w.npy", weights_layout),
+                     std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1))
+                     .set_name("conv2d_44/Conv2D")
               << BatchNormalizationLayer(
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_mean.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_var.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_gamma.npy"),
-                  get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_beta.npy"),
-                  0.000001f)
-              .set_name("conv2d_44/BatchNorm")
-              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_44/LeakyRelu");
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_mean.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_var.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_gamma.npy"),
+                     get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_beta.npy"),
+                     0.000001f)
+                     .set_name("conv2d_44/BatchNorm")
+              << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                     .set_name("conv2d_44/LeakyRelu");
         darknet53_block(data_path, "45", weights_layout, 512U);
         darknet53_block(data_path, "47", weights_layout, 512U);
         darknet53_block(data_path, "49", weights_layout, 512U);
@@ -533,43 +526,48 @@ private:
         return std::pair<SubStream, SubStream>(layer_36, layer_61);
     }
 
-    void darknet53_block(const std::string &data_path, std::string &&param_path, DataLayout weights_layout,
-                         unsigned int filter_size)
+    void darknet53_block(const std::string &data_path,
+                         std::string      &&param_path,
+                         DataLayout         weights_layout,
+                         unsigned int       filter_size)
     {
-        std::string total_path  = "/cnn_data/yolov3_model/";
-        std::string param_path2 = arm_compute::support::cpp11::to_string(arm_compute::support::cpp11::stoi(param_path) + 1);
-        SubStream   i_a(graph);
-        SubStream   i_b(graph);
+        std::string total_path = "/cnn_data/yolov3_model/";
+        std::string param_path2 =
+            arm_compute::support::cpp11::to_string(arm_compute::support::cpp11::stoi(param_path) + 1);
+        SubStream i_a(graph);
+        SubStream i_b(graph);
         i_a << ConvolutionLayer(
-                1U, 1U, filter_size,
-                get_weights_accessor(data_path, total_path + "conv2d_" + param_path + "_w.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 0, 0))
-            .set_name("conv2d_" + param_path + "/Conv2D")
+                   1U, 1U, filter_size,
+                   get_weights_accessor(data_path, total_path + "conv2d_" + param_path + "_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0))
+                   .set_name("conv2d_" + param_path + "/Conv2D")
             << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_mean.npy"),
-                get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_var.npy"),
-                get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_gamma.npy"),
-                get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_beta.npy"),
-                0.000001f)
-            .set_name("conv2d_" + param_path + "/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_" + param_path + "/LeakyRelu")
+                   get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_var.npy"),
+                   get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_gamma.npy"),
+                   get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_beta.npy"),
+                   0.000001f)
+                   .set_name("conv2d_" + param_path + "/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_" + param_path + "/LeakyRelu")
             << ConvolutionLayer(
-                3U, 3U, filter_size * 2,
-                get_weights_accessor(data_path, total_path + "conv2d_" + param_path2 + "_w.npy", weights_layout),
-                std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr),
-                PadStrideInfo(1, 1, 1, 1))
-            .set_name("conv2d_" + param_path2 + "/Conv2D")
+                   3U, 3U, filter_size * 2,
+                   get_weights_accessor(data_path, total_path + "conv2d_" + param_path2 + "_w.npy", weights_layout),
+                   std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1))
+                   .set_name("conv2d_" + param_path2 + "/Conv2D")
             << BatchNormalizationLayer(
-                get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_mean.npy"),
-                get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_var.npy"),
-                get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_gamma.npy"),
-                get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_beta.npy"),
-                0.000001f)
-            .set_name("conv2d_" + param_path2 + "/BatchNorm")
-            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_" + param_path2 + "/LeakyRelu");
+                   get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_mean.npy"),
+                   get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_var.npy"),
+                   get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_gamma.npy"),
+                   get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_beta.npy"),
+                   0.000001f)
+                   .set_name("conv2d_" + param_path2 + "/BatchNorm")
+            << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f))
+                   .set_name("conv2d_" + param_path2 + "/LeakyRelu");
 
-        graph << EltwiseLayer(std::move(i_a), std::move(i_b), EltwiseOperation::Add).set_name("").set_name("add_" + param_path + "_" + param_path2);
+        graph << EltwiseLayer(std::move(i_a), std::move(i_b), EltwiseOperation::Add)
+                     .set_name("")
+                     .set_name("add_" + param_path + "_" + param_path2);
     }
 };
 
diff --git a/examples/neon_cartoon_effect.cpp b/examples/neon_cartoon_effect.cpp
deleted file mode 100644
index 4285aa41e3..0000000000
--- a/examples/neon_cartoon_effect.cpp
+++ /dev/null
@@ -1,115 +0,0 @@
-/*
- * Copyright (c) 2017-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "arm_compute/runtime/NEON/NEFunctions.h"
-
-#include "arm_compute/core/Types.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-class NEONCartoonEffectExample : public Example
-{
-public:
-    bool do_setup(int argc, char **argv) override
-    {
-        // Open PPM file
-        PPMLoader ppm;
-
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: ./build/neon_cartoon_effect [input_image.ppm]\n\n";
-            std::cout << "No input_image provided, creating a dummy 640x480 image\n";
-            // Create an empty grayscale 640x480 image
-            src_img.allocator()->init(TensorInfo(640, 480, Format::U8));
-        }
-        else
-        {
-            ppm.open(argv[1]);
-            ppm.init_image(src_img, Format::U8);
-        }
-
-        // Initialize just the dimensions and format of the images:
-        gaus5x5_img.allocator()->init(*src_img.info());
-        canny_edge_img.allocator()->init(*src_img.info());
-        dst_img.allocator()->init(*src_img.info());
-
-        // Configure the functions to call
-        gaus5x5.configure(&src_img, &gaus5x5_img, BorderMode::REPLICATE);
-        canny_edge.configure(&src_img, &canny_edge_img, 100, 80, 3, 1, BorderMode::REPLICATE);
-        sub.configure(&gaus5x5_img, &canny_edge_img, &dst_img, ConvertPolicy::SATURATE);
-
-        // Now that the padding requirements are known we can allocate the images:
-        src_img.allocator()->allocate();
-        dst_img.allocator()->allocate();
-        gaus5x5_img.allocator()->allocate();
-        canny_edge_img.allocator()->allocate();
-
-        // Fill the input image with the content of the PPM image if a filename was provided:
-        if(ppm.is_open())
-        {
-            ppm.fill_image(src_img);
-            output_filename = std::string(argv[1]) + "_out.ppm";
-        }
-
-        return true;
-    }
-
-    void do_run() override
-    {
-        // Execute the functions:
-        gaus5x5.run();
-        canny_edge.run();
-        sub.run();
-    }
-
-    void do_teardown() override
-    {
-        // Save the result to file:
-        if(!output_filename.empty())
-        {
-            save_to_ppm(dst_img, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
-        }
-    }
-
-private:
-    Image                   src_img{}, dst_img{}, gaus5x5_img{}, canny_edge_img{};
-    NEGaussian5x5           gaus5x5{};
-    NECannyEdge             canny_edge{};
-    NEArithmeticSubtraction sub{};
-    std::string             output_filename{};
-};
-
-/** Main program for cartoon effect test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-int main(int argc, char **argv)
-{
-    return utils::run_example<NEONCartoonEffectExample>(argc, argv);
-}
diff --git a/examples/neon_cnn.cpp b/examples/neon_cnn.cpp
index ee6f46d28b..1f7a1ea6ca 100644
--- a/examples/neon_cnn.cpp
+++ b/examples/neon_cnn.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2020 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,13 +21,13 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
-#include "arm_compute/runtime/NEON/NEFunctions.h"
-
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/Allocator.h"
 #include "arm_compute/runtime/BlobLifetimeManager.h"
 #include "arm_compute/runtime/MemoryManagerOnDemand.h"
+#include "arm_compute/runtime/NEON/NEFunctions.h"
 #include "arm_compute/runtime/PoolManager.h"
+
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -43,20 +43,21 @@ public:
 
         // Create memory manager components
         // We need 2 memory managers: 1 for handling the tensors within the functions (mm_layers) and 1 for handling the input and output tensors of the functions (mm_transitions))
-        auto lifetime_mgr0  = std::make_shared<BlobLifetimeManager>();                           // Create lifetime manager
-        auto lifetime_mgr1  = std::make_shared<BlobLifetimeManager>();                           // Create lifetime manager
-        auto pool_mgr0      = std::make_shared<PoolManager>();                                   // Create pool manager
-        auto pool_mgr1      = std::make_shared<PoolManager>();                                   // Create pool manager
-        auto mm_layers      = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr0, pool_mgr0); // Create the memory manager
-        auto mm_transitions = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr1, pool_mgr1); // Create the memory manager
+        auto lifetime_mgr0 = std::make_shared<BlobLifetimeManager>();                       // Create lifetime manager
+        auto lifetime_mgr1 = std::make_shared<BlobLifetimeManager>();                       // Create lifetime manager
+        auto pool_mgr0     = std::make_shared<PoolManager>();                               // Create pool manager
+        auto pool_mgr1     = std::make_shared<PoolManager>();                               // Create pool manager
+        auto mm_layers = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr0, pool_mgr0); // Create the memory manager
+        auto mm_transitions =
+            std::make_shared<MemoryManagerOnDemand>(lifetime_mgr1, pool_mgr1); // Create the memory manager
 
         // The weights and biases tensors should be initialized with the values inferred with the training
 
         // Set memory manager where allowed to manage internal memory requirements
-        conv0   = arm_compute::support::cpp14::make_unique<NEConvolutionLayer>(mm_layers);
-        conv1   = arm_compute::support::cpp14::make_unique<NEConvolutionLayer>(mm_layers);
-        fc0     = arm_compute::support::cpp14::make_unique<NEFullyConnectedLayer>(mm_layers);
-        softmax = arm_compute::support::cpp14::make_unique<NESoftmaxLayer>(mm_layers);
+        conv0   = std::make_unique<NEConvolutionLayer>(mm_layers);
+        conv1   = std::make_unique<NEConvolutionLayer>(mm_layers);
+        fc0     = std::make_unique<NEFullyConnectedLayer>(mm_layers);
+        softmax = std::make_unique<NESoftmaxLayer>(mm_layers);
 
         /* [Initialize tensors] */
 
@@ -116,7 +117,8 @@ public:
         // Initialize tensor of fc0
         constexpr unsigned int num_labels = 128;
 
-        const TensorShape weights_shape_fc0(out_shape_pool1.x() * out_shape_pool1.y() * out_shape_pool1.z(), num_labels);
+        const TensorShape weights_shape_fc0(out_shape_pool1.x() * out_shape_pool1.y() * out_shape_pool1.z(),
+                                            num_labels);
         const TensorShape biases_shape_fc0(num_labels);
         const TensorShape out_shape_fc0(num_labels);
 
@@ -138,22 +140,28 @@ public:
         /* [Configure functions] */
 
         // in:32x32x1: 5x5 convolution, 8 output features maps (OFM)
-        conv0->configure(&src, &weights0, &biases0, &out_conv0, PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 2 /* pad_x */, 2 /* pad_y */));
+        conv0->configure(&src, &weights0, &biases0, &out_conv0,
+                         PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 2 /* pad_x */, 2 /* pad_y */));
 
         // in:32x32x8, out:32x32x8, Activation function: relu
         act0.configure(&out_conv0, &out_act0, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 
         // in:32x32x8, out:16x16x8 (2x2 pooling), Pool type function: Max
-        pool0.configure(&out_act0, &out_pool0, PoolingLayerInfo(PoolingType::MAX, 2, data_layout, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */)));
+        pool0.configure(
+            &out_act0, &out_pool0,
+            PoolingLayerInfo(PoolingType::MAX, 2, data_layout, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */)));
 
         // in:16x16x8: 3x3 convolution, 16 output features maps (OFM)
-        conv1->configure(&out_pool0, &weights1, &biases1, &out_conv1, PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 1 /* pad_x */, 1 /* pad_y */));
+        conv1->configure(&out_pool0, &weights1, &biases1, &out_conv1,
+                         PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 1 /* pad_x */, 1 /* pad_y */));
 
         // in:16x16x16, out:16x16x16, Activation function: relu
         act1.configure(&out_conv1, &out_act1, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU));
 
         // in:16x16x16, out:8x8x16 (2x2 pooling), Pool type function: Average
-        pool1.configure(&out_act1, &out_pool1, PoolingLayerInfo(PoolingType::AVG, 2, data_layout, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */)));
+        pool1.configure(
+            &out_act1, &out_pool1,
+            PoolingLayerInfo(PoolingType::AVG, 2, data_layout, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */)));
 
         // in:8x8x16, out:128
         fc0->configure(&out_pool1, &weights2, &biases2, &out_fc0);
@@ -170,8 +178,8 @@ public:
 
         // We need 2 memory groups for handling the input and output
         // We call explicitly allocate after manage() in order to avoid overlapping lifetimes
-        memory_group0 = arm_compute::support::cpp14::make_unique<MemoryGroup>(mm_transitions);
-        memory_group1 = arm_compute::support::cpp14::make_unique<MemoryGroup>(mm_transitions);
+        memory_group0 = std::make_unique<MemoryGroup>(mm_transitions);
+        memory_group1 = std::make_unique<MemoryGroup>(mm_transitions);
 
         memory_group0->manage(&out_conv0);
         out_conv0.allocator()->allocate();
@@ -257,7 +265,7 @@ private:
     Tensor out_fc0{};
     Tensor out_softmax{};
 
-    // NEON allocator
+    // Allocator
     Allocator allocator{};
 
     // Memory groups
diff --git a/examples/neon_convolution.cpp b/examples/neon_convolution.cpp
deleted file mode 100644
index 56b4ddc0be..0000000000
--- a/examples/neon_convolution.cpp
+++ /dev/null
@@ -1,130 +0,0 @@
-/*
- * Copyright (c) 2016-2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/NEFunctions.h"
-
-#include "arm_compute/core/Types.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-/** Gaussian 3x3 matrix
- */
-const std::array<int16_t, 9> gaussian3x3 =
-{
-    1, 2, 1,
-    2, 4, 2,
-    1, 2, 1
-};
-
-/** Gaussian 5x5 matrix
- */
-const std::array<int16_t, 25> gaussian5x5 =
-{
-    1, 4, 6, 4, 1,
-    4, 16, 24, 16, 4,
-    6, 24, 36, 24, 6,
-    4, 16, 24, 16, 4,
-    1, 4, 6, 4, 1
-};
-
-class NEONConvolutionExample : public Example
-{
-public:
-    bool do_setup(int argc, char **argv) override
-    {
-        /** [Accurate padding] **/
-        PPMLoader ppm;
-
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: ./build/neon_convolution [input_image.ppm]\n\n";
-            std::cout << "No input_image provided, creating a dummy 640x480 image\n";
-            // Initialize just the dimensions and format of your buffers:
-            src.allocator()->init(TensorInfo(640, 480, Format::U8));
-        }
-        else
-        {
-            ppm.open(argv[1]);
-            // Initialize just the dimensions and format of your buffers:
-            ppm.init_image(src, Format::U8);
-        }
-
-        // Initialize just the dimensions and format of the temporary and destination images:
-        tmp.allocator()->init(*src.info());
-        dst.allocator()->init(*src.info());
-
-        // Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5:
-        // The function will automatically update the padding information inside input and output to match its requirements
-        conv3x3.configure(&src, &tmp, gaussian3x3.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED);
-        conv5x5.configure(&tmp, &dst, gaussian5x5.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED);
-
-        // Now that the padding requirements are known we can allocate the images:
-        src.allocator()->allocate();
-        tmp.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        // Fill the input image with the content of the PPM image if a filename was provided:
-        if(ppm.is_open())
-        {
-            ppm.fill_image(src);
-            output_filename = std::string(argv[1]) + "_out.ppm";
-        }
-        /** [Accurate padding] **/
-
-        return true;
-    }
-    void do_run() override
-    {
-        //Execute the functions:
-        conv3x3.run();
-        conv5x5.run();
-    }
-    void do_teardown() override
-    {
-        // Save the result to file:
-        if(!output_filename.empty())
-        {
-            save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
-        }
-    }
-
-private:
-    Image            src{}, tmp{}, dst{};
-    NEConvolution3x3 conv3x3{};
-    NEConvolution5x5 conv5x5{};
-    std::string      output_filename{};
-};
-
-/** Main program for convolution test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-int main(int argc, char **argv)
-{
-    return utils::run_example<NEONConvolutionExample>(argc, argv);
-}
diff --git a/examples/neon_copy_objects.cpp b/examples/neon_copy_objects.cpp
index 84a2abd379..6e9ebcaad5 100644
--- a/examples/neon_copy_objects.cpp
+++ b/examples/neon_copy_objects.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,9 +22,9 @@
  * SOFTWARE.
  */
 
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
 
-#include "arm_compute/core/Types.h"
 #include "utils/Utils.h"
 
 #include <cstring>
@@ -49,12 +49,12 @@ public:
         src_data = new float[width * height * batch];
         dst_data = new float[width * height * batch];
 
-        // Fill src_data with dummy values:
-        for(unsigned int b = 0; b < batch; b++)
+        // Fill src_data with pseudo(meaningless) values:
+        for (unsigned int b = 0; b < batch; b++)
         {
-            for(unsigned int h = 0; h < height; h++)
+            for (unsigned int h = 0; h < height; h++)
             {
-                for(unsigned int w = 0; w < width; w++)
+                for (unsigned int w = 0; w < width; w++)
                 {
                     src_data[b * (width * height) + h * width + w] = static_cast<float>(100 * b + 10 * h + w);
                 }
@@ -78,9 +78,12 @@ public:
         Window input_window;
         input_window.use_tensor_dimensions(input.info()->tensor_shape());
         std::cout << " Dimensions of the input's iterator:\n";
-        std::cout << " X = [start=" << input_window.x().start() << ", end=" << input_window.x().end() << ", step=" << input_window.x().step() << "]\n";
-        std::cout << " Y = [start=" << input_window.y().start() << ", end=" << input_window.y().end() << ", step=" << input_window.y().step() << "]\n";
-        std::cout << " Z = [start=" << input_window.z().start() << ", end=" << input_window.z().end() << ", step=" << input_window.z().step() << "]\n";
+        std::cout << " X = [start=" << input_window.x().start() << ", end=" << input_window.x().end()
+                  << ", step=" << input_window.x().step() << "]\n";
+        std::cout << " Y = [start=" << input_window.y().start() << ", end=" << input_window.y().end()
+                  << ", step=" << input_window.y().step() << "]\n";
+        std::cout << " Z = [start=" << input_window.z().start() << ", end=" << input_window.z().end()
+                  << ", step=" << input_window.z().step() << "]\n";
 
         // Create an iterator:
         Iterator input_it(&input, input_window);
@@ -98,20 +101,28 @@ public:
         //   }
         // }
         // Except it works for an arbitrary number of dimensions
-        execute_window_loop(input_window, [&](const Coordinates & id)
-        {
-            std::cout << "Setting item [" << id.x() << "," << id.y() << "," << id.z() << "]\n";
-            *reinterpret_cast<float *>(input_it.ptr()) = src_data[id.z() * (width * height) + id.y() * width + id.x()];
-        },
-        input_it);
+        execute_window_loop(
+            input_window,
+            [&](const Coordinates &id)
+            {
+                std::cout << "Setting item [" << id.x() << "," << id.y() << "," << id.z() << "]\n";
+                *reinterpret_cast<float *>(input_it.ptr()) =
+                    src_data[id.z() * (width * height) + id.y() * width + id.x()];
+            },
+            input_it);
 
         // More efficient way: create an iterator to iterate through each row (instead of each element) of the output tensor:
         Window output_window;
-        output_window.use_tensor_dimensions(output.info()->tensor_shape(), /* first_dimension =*/Window::DimY); // Iterate through the rows (not each element)
+        output_window.use_tensor_dimensions(
+            output.info()->tensor_shape(),
+            /* first_dimension =*/Window::DimY); // Iterate through the rows (not each element)
         std::cout << " Dimensions of the output's iterator:\n";
-        std::cout << " X = [start=" << output_window.x().start() << ", end=" << output_window.x().end() << ", step=" << output_window.x().step() << "]\n";
-        std::cout << " Y = [start=" << output_window.y().start() << ", end=" << output_window.y().end() << ", step=" << output_window.y().step() << "]\n";
-        std::cout << " Z = [start=" << output_window.z().start() << ", end=" << output_window.z().end() << ", step=" << output_window.z().step() << "]\n";
+        std::cout << " X = [start=" << output_window.x().start() << ", end=" << output_window.x().end()
+                  << ", step=" << output_window.x().step() << "]\n";
+        std::cout << " Y = [start=" << output_window.y().start() << ", end=" << output_window.y().end()
+                  << ", step=" << output_window.y().step() << "]\n";
+        std::cout << " Z = [start=" << output_window.z().start() << ", end=" << output_window.z().end()
+                  << ", step=" << output_window.z().step() << "]\n";
 
         // Create an iterator:
         Iterator output_it(&output, output_window);
@@ -126,13 +137,15 @@ public:
         //   }
         // }
         // Except it works for an arbitrary number of dimensions
-        execute_window_loop(output_window, [&](const Coordinates & id)
-        {
-            std::cout << "Copying one row starting from [" << id.x() << "," << id.y() << "," << id.z() << "]\n";
-            // Copy one whole row:
-            memcpy(dst_data + id.z() * (width * height) + id.y() * width, output_it.ptr(), width * sizeof(float));
-        },
-        output_it);
+        execute_window_loop(
+            output_window,
+            [&](const Coordinates &id)
+            {
+                std::cout << "Copying one row starting from [" << id.x() << "," << id.y() << "," << id.z() << "]\n";
+                // Copy one whole row:
+                memcpy(dst_data + id.z() * (width * height) + id.y() * width, output_it.ptr(), width * sizeof(float));
+            },
+            output_it);
 
         /** [Copy objects example] */
 
@@ -140,7 +153,7 @@ public:
     }
     void do_run() override
     {
-        // Run NEON softmax:
+        // Run softmax:
         softmax.run();
     }
     void do_teardown() override
diff --git a/examples/neon_gemm_qasymm8.cpp b/examples/neon_gemm_qasymm8.cpp
index f028e004c2..3aaad02f8a 100644
--- a/examples/neon_gemm_qasymm8.cpp
+++ b/examples/neon_gemm_qasymm8.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2020 ARM Limited.
+ * Copyright (c) 2020-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -22,12 +22,13 @@
  * SOFTWARE.
  */
 #include "arm_compute/core/Types.h"
-#include "arm_compute/core/WindowIterator.h"
 #include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/core/WindowIterator.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
-#include "utils/Utils.h"
+
 #include "support/ToolchainSupport.h"
+#include "utils/Utils.h"
 
 #include <cstdlib>
 
@@ -38,7 +39,7 @@ using namespace utils;
 void find_min_max(int size, const float *data, float *min, float *max)
 {
     *min = *max = data[0];
-    for(int i = 0; i < size; i++)
+    for (int i = 0; i < size; i++)
     {
         const float val = data[i];
         *min            = std::min(*min, val);
@@ -66,11 +67,11 @@ QuantizationInfo choose_quantization_params(float min, float max)
 
     // But we need to nudge the zero_point to an integer (exact quantized value)
     std::uint8_t zero_point_nudged = 0;
-    if(zero_point_real < qmin)
+    if (zero_point_real < qmin)
     {
         zero_point_nudged = qmin;
     }
-    else if(zero_point_real > qmax)
+    else if (zero_point_real > qmax)
     {
         zero_point_nudged = qmax;
     }
@@ -85,7 +86,7 @@ QuantizationInfo choose_quantization_params(float min, float max)
 
 void quantize_values(int size, qasymm8_t *output, float *input, const QuantizationInfo qinfo)
 {
-    for(int i = 0; i < size; i++)
+    for (int i = 0; i < size; i++)
     {
         output[i] = quantize_qasymm8(input[i], qinfo);
     }
@@ -102,13 +103,13 @@ int main(int argc, char **argv)
     Tensor q_dst0;
     Tensor q_res;
     Tensor q_res_output;
-    size_t M = 4;
-    size_t N = 4;
-    size_t K = 4;
-    bool default_input = true;
+    size_t M             = 4;
+    size_t N             = 4;
+    size_t K             = 4;
+    bool   default_input = true;
 
     // Parse args
-    if(argc < 3) /* case default matrix sizes */
+    if (argc < 3) /* case default matrix sizes */
     {
         // Print help
         std::cout << "Usage: ./build/neon_gemm_qasymm8 M N K\n";
@@ -144,20 +145,23 @@ int main(int argc, char **argv)
 
     // Fill in: one is the identity matrix, other is sequential values
     // src1: Identity matrix
-    for(size_t i = 0; i < M * K; i++) {
+    for (size_t i = 0; i < M * K; i++)
+    {
         src1_ptr[i] = 0;
     }
-    for(size_t i = 0; i < M; i++) {
+    for (size_t i = 0; i < M; i++)
+    {
         src1_ptr[i * K + i] = 1.0f;
     }
 
     // src2: Sequential values matrix
-    for(size_t i = 0; i < K * N; i++) {
+    for (size_t i = 0; i < K * N; i++)
+    {
         src2_ptr[i] = i * 1.123f;
     }
 
     // Otherwise if M, N, K is given, fill in with random values
-    if(!default_input)
+    if (!default_input)
     {
         fill_random_tensor(src1, 0.f, 1.f);
         fill_random_tensor(src2, 0.f, 1.f);
@@ -217,13 +221,22 @@ int main(int argc, char **argv)
     qgemm.configure(&q_src1, &q_src2, nullptr, &q_res);
 
     // Configure output stage after computing shift and multiplier parameters
-    NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint gemmlowp_output_stage;
-    int output_multiplier;
-    int output_shift;
+    NEGEMMLowpOutputStage gemmlowp_output_stage;
+    int                   output_multiplier;
+    int                   output_shift;
     float multiplier = (src1_qinfo.uniform().scale * src2_qinfo.uniform().scale) / dst0_qinfo.uniform().scale;
     quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift);
     std::cout << "(q_multiplier, q_shift) = (" << output_multiplier << ", " << output_shift << ")\n\n";
-    gemmlowp_output_stage.configure(&q_res, nullptr, &q_res_output, output_multiplier, output_shift, dst0_qinfo.uniform().offset);
+
+    GEMMLowpOutputStageInfo info;
+    info.type                = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT;
+    info.gemmlowp_multiplier = output_multiplier;
+    info.gemmlowp_shift      = output_shift;
+    info.gemmlowp_offset     = dst0_qinfo.uniform().offset;
+    info.output_data_type    = DataType::QASYMM8;
+    q_res_output.info()->set_data_type(DataType::QASYMM8);
+    q_res_output.info()->set_num_channels(1);
+    gemmlowp_output_stage.configure(&q_res, nullptr, &q_res_output, info);
 
     // Allocate all tensors
     q_src1.allocator()->allocate();
@@ -240,7 +253,7 @@ int main(int argc, char **argv)
     qgemm.run();
     // Run output stage kernel
     gemmlowp_output_stage.run();
-    std::cout << "Done\n";
+    std::cout << "\nTest Passed\n";
 
 #if ARM_COMPUTE_DEBUG_ENABLED
     // Print quantized source matrices
diff --git a/examples/neon_gemm_s8_f32.cpp b/examples/neon_gemm_s8_f32.cpp
new file mode 100644
index 0000000000..7c1497ec41
--- /dev/null
+++ b/examples/neon_gemm_s8_f32.cpp
@@ -0,0 +1,239 @@
+/*
+ * Copyright (c) 2020-2021, 2024 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/core/utils/quantization/AsymmHelpers.h"
+#include "arm_compute/core/WindowIterator.h"
+#include "arm_compute/runtime/NEON/NEFunctions.h"
+#include "arm_compute/runtime/NEON/NEScheduler.h"
+
+#include "support/ToolchainSupport.h"
+#include "utils/Utils.h"
+
+#include <cstdlib>
+
+using namespace arm_compute;
+using namespace utils;
+
+QuantizationInfo dynamic_qinfo(QuantizationInfo qinfo)
+{
+    return QuantizationInfo(qinfo.scale(), qinfo.offset(), true);
+}
+void set_qinfo_dynamic(Tensor &t)
+{
+    t.info()->set_quantization_info(dynamic_qinfo(t.info()->quantization_info()));
+}
+
+void quantize(Tensor &qt, const Tensor &t, float min, float max)
+{
+    DataType dt = DataType::QASYMM8_SIGNED;
+
+    // Determine the scale
+    const float scale = (max - min) / 256.0f;
+
+    // Determine the zero-point; using affine equation val = (qval-zerop) * scale
+    const float zero_point = -128.0f - min / scale;
+
+    QuantizationInfo qinfo(scale, (int32_t)round(zero_point), true);
+
+    // We now have the quantisation info and can configure the quantised tensor
+    qt.allocator()->init(TensorInfo(t.info()->tensor_shape(), 1, dt, qinfo));
+    qt.allocator()->allocate();
+    NEQuantizationLayer quantization;
+    quantization.configure(&t, &qt);
+    quantization.run();
+}
+
+void invert_qinfo_offset(Tensor &t)
+{
+    QuantizationInfo qinfo = t.info()->quantization_info();
+    t.info()->set_quantization_info(QuantizationInfo(qinfo.scale()[0], -qinfo.offset()[0], qinfo.is_dynamic()));
+}
+
+void print_quantization_info(const Tensor &t, const std::string &name_prefix)
+{
+    QuantizationInfo qinfo = t.info()->quantization_info();
+    std::cout << name_prefix << "_qinfo="
+              << "QuantizationInfo(" << qinfo.scale()[0] << ", " << qinfo.offset()[0] << ")\n";
+}
+
+int main(int argc, char **argv)
+{
+    size_t M = 4;
+    size_t N = 4;
+    size_t K = 4;
+
+    // Parse args
+    if (argc < 3) /* case default matrix sizes */
+    {
+        // Print help
+        std::cout << "Usage: ./build/neon_gemm_qasymm8 M N K\n";
+        std::cout << "Too few or no inputs provided. Using default M=4, N=4, K=4\n\n";
+    }
+    else /* case M N K arguments provided */
+    {
+        M = strtol(argv[1], nullptr, 10);
+        N = strtol(argv[2], nullptr, 10);
+        K = strtol(argv[3], nullptr, 10);
+    }
+
+    /*** Floating point matrix multiplication ***/
+
+    // Initialise input matrices
+    NEGEMM fgemm{};
+
+    Tensor src1;
+    Tensor src2;
+    Tensor dst;
+    src1.allocator()->init(TensorInfo(TensorShape(K, M), 1, DataType::F32));
+    src2.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32));
+    dst.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
+    fgemm.configure(&src1, &src2, nullptr, &dst, 1, 0);
+
+    // Allocate matrices
+    src1.allocator()->allocate();
+    src2.allocator()->allocate();
+    dst.allocator()->allocate();
+
+    float min1 = 0.0f;
+    float max1 = 1.0f;
+    fill_random_tensor(src1, 0, min1, max1);
+
+    float min2 = -1.0f;
+    float max2 = 2.0f;
+    fill_random_tensor(src2, 1, min2, max2);
+
+    // Run single precision gemm and print result
+    fgemm.run();
+
+#if ARM_COMPUTE_DEBUG_ENABLED
+    std::cout << "# F32 GEMM result:\n";
+    std::cout << "src1=[ \n";
+    src1.print(std::cout);
+    std::cout << "] \n";
+    std::cout << "src2=[ \n";
+    src2.print(std::cout);
+    std::cout << "] \n";
+    std::cout << "dst=[ \n";
+    dst.print(std::cout);
+    std::cout << "] \n";
+#endif // ARM_COMPUTE_DEBUG_ENABLED
+
+    Tensor q_src1;
+    quantize(q_src1, src1, min1, max1);
+    print_quantization_info(q_src1, "src1");
+    q_src1.info()->set_are_values_constant(false);
+
+    // NEGEMMLowpMatrixMultiplyCore adopts the opposite convention for the offset
+    // compared to NEQuantizeLayer
+    invert_qinfo_offset(q_src1);
+
+    Tensor q_src2;
+    quantize(q_src2, src2, min2, max2);
+    print_quantization_info(q_src2, "src2");
+    q_src2.info()->set_are_values_constant(false);
+
+    // NEGEMMLowpMatrixMultiplyCore adopts the opposite convention for the offset
+    // compared to NEQuantizeLayer
+    invert_qinfo_offset(q_src2);
+
+    // q_dst will be Dequantized to F32 so it doesn't need a QuantizationInfo
+    Tensor q_dst;
+    q_dst.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
+
+    // Configure low precision gemm and initialise result tensor (pre-output)
+    NEGEMMLowpMatrixMultiplyCore qgemm;
+    qgemm.configure(&q_src1, &q_src2, nullptr, &q_dst);
+
+    q_dst.allocator()->allocate();
+
+    // Run low precision matrix multiply kernel
+    qgemm.run();
+
+#if ARM_COMPUTE_DEBUG_ENABLED
+    // Print quantized source matrices
+    std::cout << "q_src1=[ \n";
+    q_src1.print(std::cout);
+    std::cout << "] \n";
+    std::cout << "q_src2=[ \n";
+    q_src2.print(std::cout);
+    std::cout << "] \n";
+    std::cout << "# Lowp GEMM output (FP32):\n";
+    std::cout << "q_dst=[ \n";
+    q_dst.print(std::cout);
+    std::cout << "] \n";
+
+    // Expected result
+    std::cout << "# Expected result:\n";
+    std::cout << "dst=[ \n";
+    dst.print(std::cout);
+    std::cout << "] \n";
+#endif // ARM_COMPUTE_DEBUG_ENABLED
+
+    // Rerun to test the ability to modify the Tensor contents and QuantizationInfo (dynamic quantization)
+    min1 = -1.0f;
+    max1 = 1.0f;
+    fill_random_tensor(src1, 2, min1, max1);
+
+#if ARM_COMPUTE_DEBUG_ENABLED
+    std::cout << "# Refilled src1\n";
+    std::cout << "src1=[ \n";
+    src1.print(std::cout);
+    std::cout << "] \n";
+    std::cout << "src2=[ \n";
+    src2.print(std::cout);
+    std::cout << "] \n";
+#endif // ARM_COMPUTE_DEBUG_ENABLED
+
+    fgemm.run();
+
+    quantize(q_src1, src1, min1, max1);
+    set_qinfo_dynamic(q_src1);
+    print_quantization_info(q_src1, "src1");
+
+    // NEGEMMLowpMatrixMultiplyCore adopts the opposite convention for the offset
+    // compared to NEQuantizeLayer
+    invert_qinfo_offset(q_src1);
+
+    qgemm.run();
+
+#if ARM_COMPUTE_DEBUG_ENABLED
+    // Print quantized source matrices
+    std::cout << "q_src1=[ \n";
+    q_src1.print(std::cout);
+    std::cout << "] \n";
+    std::cout << "q_src2=[ \n";
+    q_src2.print(std::cout);
+    std::cout << "] \n";
+    std::cout << "# Lowp GEMM output (FP32):\n";
+    std::cout << "q_dst=[ \n";
+    q_dst.print(std::cout);
+    std::cout << "] \n";
+
+    // Expected result
+    std::cout << "# Expected result:\n";
+    std::cout << "dst=[ \n";
+    dst.print(std::cout);
+    std::cout << "] \n";
+#endif // ARM_COMPUTE_DEBUG_ENABLED
+}
diff --git a/examples/neon_opticalflow.cpp b/examples/neon_opticalflow.cpp
deleted file mode 100644
index b5df819e97..0000000000
--- a/examples/neon_opticalflow.cpp
+++ /dev/null
@@ -1,263 +0,0 @@
-/*
- * Copyright (c) 2019 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/NEON/NEFunctions.h"
-
-#include "arm_compute/core/Types.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-#include <fstream>
-#include <sstream>
-#include <vector>
-
-using namespace arm_compute;
-using namespace utils;
-
-class NeonOpticalFlowExample : public Example
-{
-public:
-    NeonOpticalFlowExample()
-        : input_points(100), output_points(100), point_estimates(100)
-    {
-    }
-
-    bool do_setup(int argc, char **argv) override
-    {
-        if(argc < 5)
-        {
-            // Print help
-            std::cout << "Usage: ./build/neon_opticalflow [src_1st.ppm] [src_2nd.ppm] [keypoints] [estimates]\n\n";
-            const unsigned int img_width  = 64;
-            const unsigned int img_height = 64;
-            const unsigned int rect_x     = 20;
-            const unsigned int rect_y     = 40;
-            const unsigned int rect_s     = 8;
-            const unsigned int offsetx    = 24;
-            const unsigned int offsety    = 3;
-            std::cout << "No input_image provided, creating test data:\n";
-            std::cout << "\t Image src_1st = (" << img_width << "," << img_height << ")" << std::endl;
-            std::cout << "\t Image src_2nd = (" << img_width << "," << img_height << ")" << std::endl;
-            init_img(src_1st, img_width, img_height, rect_x, rect_y, rect_s);
-            init_img(src_2nd, img_width, img_height, rect_x + offsetx, rect_y + offsety, rect_s);
-            const int num_points = 4;
-            input_points.resize(num_points);
-            point_estimates.resize(num_points);
-            const std::array<unsigned int, num_points> tracking_coordsx = { rect_x - 1, rect_x, rect_x + 1, rect_x + 2 };
-            const std::array<unsigned int, num_points> tracking_coordsy = { rect_y - 1, rect_y, rect_y + 1, rect_y + 2 };
-            const std::array<unsigned int, num_points> estimate_coordsx = { rect_x + offsetx - 1, rect_x + offsetx, rect_x + offsetx + 1, rect_x + offsetx + 2 };
-            const std::array<unsigned int, num_points> estimate_coordsy = { rect_y + offsety - 1, rect_y + offsety, rect_y + offsety + 1, rect_y + offsety + 2 };
-
-            for(int k = 0; k < num_points; ++k)
-            {
-                auto &keypoint           = input_points.at(k);
-                keypoint.x               = tracking_coordsx[k];
-                keypoint.y               = tracking_coordsy[k];
-                keypoint.tracking_status = 1;
-            }
-            for(int k = 0; k < num_points; ++k)
-            {
-                auto &keypoint           = point_estimates.at(k);
-                keypoint.x               = estimate_coordsx[k];
-                keypoint.y               = estimate_coordsy[k];
-                keypoint.tracking_status = 1;
-            }
-        }
-        else
-        {
-            load_ppm(argv[1], src_1st);
-            load_ppm(argv[2], src_2nd);
-            load_keypoints(argv[3], input_points);
-            load_keypoints(argv[4], point_estimates);
-        }
-
-        print_points(input_points, "Tracking points : ");
-        print_points(point_estimates, "Estimates points : ");
-
-        const unsigned int num_levels = 3;
-        // Initialise and allocate pyramids
-        PyramidInfo pyramid_info(num_levels, SCALE_PYRAMID_HALF, src_1st.info()->tensor_shape(), src_1st.info()->format());
-        pyr_1st.init_auto_padding(pyramid_info);
-        pyr_2nd.init_auto_padding(pyramid_info);
-
-        pyrf_1st.configure(&src_1st, &pyr_1st, BorderMode::UNDEFINED, 0);
-        pyrf_2nd.configure(&src_2nd, &pyr_2nd, BorderMode::UNDEFINED, 0);
-
-        output_points.resize(input_points.num_values());
-
-        optkf.configure(&pyr_1st, &pyr_2nd,
-                        &input_points, &point_estimates, &output_points,
-                        Termination::TERM_CRITERIA_BOTH, 0.01f, 15, 5, true, BorderMode::UNDEFINED, 0);
-
-        pyr_1st.allocate();
-        pyr_2nd.allocate();
-
-        return true;
-    }
-    void do_run() override
-    {
-        //Execute the functions:
-        pyrf_1st.run();
-        pyrf_2nd.run();
-        optkf.run();
-    }
-    void do_teardown() override
-    {
-        print_points(output_points, "Output points : ");
-    }
-
-private:
-    /** Loads the input keypoints from a file into an array
-     *
-     * @param[in]  fn  Filename containing the keypoints. Each line must have two values X Y.
-     * @param[out] img Reference to an unintialised KeyPointArray
-     */
-    bool load_keypoints(const std::string &fn, KeyPointArray &array)
-    {
-        assert(!fn.empty());
-        std::ifstream f(fn);
-        if(f.is_open())
-        {
-            std::cout << "Reading points from " << fn << std::endl;
-            std::vector<KeyPoint> v;
-            for(std::string line; std::getline(f, line);)
-            {
-                std::stringstream ss(line);
-                std::string       xcoord;
-                std::string       ycoord;
-                getline(ss, xcoord, ' ');
-                getline(ss, ycoord, ' ');
-                KeyPoint kp;
-                kp.x               = std::stoi(xcoord);
-                kp.y               = std::stoi(ycoord);
-                kp.tracking_status = 1;
-                v.push_back(kp);
-            }
-            const int num_points = v.size();
-            array.resize(num_points);
-            for(int k = 0; k < num_points; ++k)
-            {
-                auto &keypoint = array.at(k);
-                keypoint       = v[k];
-            }
-            return true;
-        }
-        else
-        {
-            std::cout << "Cannot open keypoints file " << fn << std::endl;
-            return false;
-        }
-    }
-
-    /** Creates and Image and fills it with the ppm data from the file
-     *
-     * @param[in]  fn  PPM filename to be loaded
-     * @param[out] img Reference to an unintialised image instance
-     */
-    bool load_ppm(const std::string &fn, Image &img)
-    {
-        assert(!fn.empty());
-        PPMLoader ppm;
-        ppm.open(fn);
-        ppm.init_image(img, Format::U8);
-        img.allocator()->allocate();
-        if(ppm.is_open())
-        {
-            std::cout << "Reading image " << fn << std::endl;
-            ppm.fill_image(img);
-            return true;
-        }
-        else
-        {
-            std::cout << "Cannot open " << fn << std::endl;
-            return false;
-        }
-    }
-    /** Creates and Image and draws a square in the specified coordinares.
-     *
-     * @param[out] img             Reference to an unintialised image instance
-     * @param[in]  img_width       Width of the image to be created
-     * @param[in]  img_height      Height of the image to be created
-     * @param[in]  square_center_x Coordinate along x-axis to be used as the center for the square
-     * @param[in]  square_center_y Coordinate along y-axis to be used as the center for the square
-     * @param[in]  square_size     Size in pixels to be used for the square
-     */
-    void init_img(Image &img, unsigned int img_width, unsigned int img_height,
-                  unsigned int square_center_x, unsigned int square_center_y,
-                  unsigned int square_size)
-    {
-        img.allocator()->init(TensorInfo(img_width, img_height, Format::U8));
-        img.allocator()->allocate();
-        const unsigned int square_half = square_size / 2;
-        // assert the square is in the bounds of the image
-        assert(square_center_x > square_half && square_center_x + square_half < img_width);
-        assert(square_center_y > square_half && square_center_y + square_half < img_height);
-        // get ptr to the top left pixel for the squeare
-        std::fill(img.buffer(), img.buffer() + img_width * img_height, 0);
-        for(unsigned int i = 0; i < square_size; ++i)
-        {
-            for(unsigned int j = 0; j < square_size; ++j)
-            {
-                uint8_t *ptr = img.ptr_to_element(Coordinates(square_center_x - square_half + j, square_center_y - square_half + i));
-                *ptr         = 0xFF;
-            }
-        }
-    }
-    /** Prints an array of keypoints and an optional label
-     *
-     * @param[in] a   Keypoint array to be printed
-     * @param[in] str Label to be printed before the array
-     */
-    void print_points(const KeyPointArray &a, const std::string &str = "")
-    {
-        std::cout << str << std::endl;
-        for(unsigned int k = 0; k < a.num_values(); ++k)
-        {
-            auto kp = a.at(k);
-            std::cout << "\t "
-                      << " (x,y) = (" << kp.x << "," << kp.y << ")";
-            std::cout << " strength = " << kp.strength << " "
-                      << " scale = " << kp.scale << " orientation " << kp.orientation << " status " << kp.tracking_status << " err = " << kp.error << std::endl;
-        }
-    }
-
-    Pyramid               pyr_1st{};
-    Pyramid               pyr_2nd{};
-    NEGaussianPyramidHalf pyrf_1st{};
-    NEGaussianPyramidHalf pyrf_2nd{};
-    NEOpticalFlow         optkf{};
-    Image                 src_1st{}, src_2nd{};
-    KeyPointArray         input_points;
-    KeyPointArray         output_points;
-    KeyPointArray         point_estimates;
-};
-
-/** Main program for optical flow test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-int main(int argc, char **argv)
-{
-    return utils::run_example<NeonOpticalFlowExample>(argc, argv);
-}
diff --git a/examples/neon_permute.cpp b/examples/neon_permute.cpp
index 05c8169020..76ba079430 100644
--- a/examples/neon_permute.cpp
+++ b/examples/neon_permute.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2019 ARM Limited.
+ * Copyright (c) 2019 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,9 +21,9 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
 
-#include "arm_compute/core/Types.h"
 #include "utils/Utils.h"
 
 using namespace arm_compute;
@@ -85,11 +85,13 @@ private:
         window.use_tensor_dimensions(reference.info()->tensor_shape());
         Iterator ref_it(&reference, window);
         Iterator res_it(&result, window);
-        execute_window_loop(window, [&](const Coordinates &)
-        {
-            assert(*reinterpret_cast<unsigned char *>(ref_it.ptr()) == *reinterpret_cast<unsigned char *>(res_it.ptr()));
-        },
-        ref_it, res_it);
+        execute_window_loop(
+            window,
+            [&](const Coordinates &) {
+                assert(*reinterpret_cast<unsigned char *>(ref_it.ptr()) ==
+                       *reinterpret_cast<unsigned char *>(res_it.ptr()));
+            },
+            ref_it, res_it);
     }
 
     void fill_tensor(Tensor &tensor)
@@ -98,11 +100,9 @@ private:
         window.use_tensor_dimensions(tensor.info()->tensor_shape());
         Iterator      tensor_it(&tensor, window);
         unsigned char val(0);
-        execute_window_loop(window, [&](const Coordinates &)
-        {
-            *reinterpret_cast<unsigned char *>(tensor_it.ptr()) = val++;
-        },
-        tensor_it);
+        execute_window_loop(
+            window, [&](const Coordinates &) { *reinterpret_cast<unsigned char *>(tensor_it.ptr()) = val++; },
+            tensor_it);
     }
     void init_tensor(const TensorShape shape, Tensor &tensor, DataType type, DataLayout layout)
     {
diff --git a/examples/neon_scale.cpp b/examples/neon_scale.cpp
index b04d916aaf..28590bd861 100644
--- a/examples/neon_scale.cpp
+++ b/examples/neon_scale.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -21,9 +21,9 @@
  * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
  * SOFTWARE.
  */
+#include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
 
-#include "arm_compute/core/Types.h"
 #include "utils/ImageLoader.h"
 #include "utils/Utils.h"
 
@@ -37,7 +37,7 @@ public:
     {
         PPMLoader ppm;
 
-        if(argc < 2)
+        if (argc < 2)
         {
             // Print help
             std::cout << "Usage: ./build/neon_scale[input_image.ppm]\n\n";
@@ -60,14 +60,16 @@ public:
         dst.allocator()->init(dst_tensor_info);
 
         // Configure Scale function object:
-        scale.configure(&src, &dst, InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED);
+        scale.configure(&src, &dst,
+                        ScaleKernelInfo{InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, PixelValue(),
+                                        SamplingPolicy::CENTER, false});
 
         // Allocate all the images
         src.allocator()->allocate();
         dst.allocator()->allocate();
 
         // Fill the input image with the content of the PPM image if a filename was provided:
-        if(ppm.is_open())
+        if (ppm.is_open())
         {
             ppm.fill_image(src);
             output_filename = std::string(argv[1]) + "_out.ppm";
@@ -83,7 +85,7 @@ public:
     void do_teardown() override
     {
         // Save the result to file:
-        if(!output_filename.empty())
+        if (!output_filename.empty())
         {
             save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
         }
diff --git a/examples/neon_sgemm.cpp b/examples/neon_sgemm.cpp
index 8f395dec9a..8cda65a400 100644
--- a/examples/neon_sgemm.cpp
+++ b/examples/neon_sgemm.cpp
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2018-2019 ARM Limited.
+ * Copyright (c) 2018-2019 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -24,6 +24,7 @@
 #include "arm_compute/core/Types.h"
 #include "arm_compute/runtime/NEON/NEFunctions.h"
 #include "arm_compute/runtime/NEON/NEScheduler.h"
+
 #include "utils/Utils.h"
 
 #include <cstdlib>
@@ -43,15 +44,16 @@ public:
         beta  = 0.0f;
 
         std::ifstream stream;
-        if(argc > 1)
+        if (argc > 1)
         {
             stream.open(argv[1], std::fstream::in);
         }
 
-        if(argc < 3 || (argc < 4 && stream.bad()))
+        if (argc < 3 || (argc < 4 && stream.bad()))
         {
             // Print help
-            std::cout << "Usage: 1) ./build/neon_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha = 1] [beta = 0]\n";
+            std::cout << "Usage: 1) ./build/neon_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] "
+                         "[alpha = 1] [beta = 0]\n";
             std::cout << "       2) ./build/neon_sgemm M N K [alpha = 1.0f] [beta = 0.0f]\n\n";
             std::cout << "Too few or no input_matrices provided. Using M=7, N=3, K=5, alpha=1.0f and beta=0.0f\n\n";
 
@@ -61,29 +63,29 @@ public:
         }
         else
         {
-            if(stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */
+            if (stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */
             {
                 npy0.open(argv[1]);
                 npy0.init_tensor(src0, DataType::F32);
                 npy1.open(argv[2]);
                 npy1.init_tensor(src1, DataType::F32);
 
-                if(argc > 3)
+                if (argc > 3)
                 {
                     stream.close();
                     stream.clear();
                     stream.open(argv[3], std::fstream::in);
-                    if(stream.good()) /* case with third file */
+                    if (stream.good()) /* case with third file */
                     {
                         npy2.open(argv[3]);
                         npy2.init_tensor(src2, DataType::F32);
 
-                        if(argc > 4)
+                        if (argc > 4)
                         {
                             // Convert string to float
                             alpha = strtof(argv[4], nullptr);
 
-                            if(argc > 5)
+                            if (argc > 5)
                             {
                                 // Convert string to float
                                 beta = strtof(argv[5], nullptr);
@@ -94,7 +96,7 @@ public:
                     {
                         alpha = strtof(argv[3], nullptr);
 
-                        if(argc > 4)
+                        if (argc > 4)
                         {
                             beta = strtof(argv[4], nullptr);
                         }
@@ -111,11 +113,11 @@ public:
                 src1.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32));
                 src2.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32));
 
-                if(argc > 4)
+                if (argc > 4)
                 {
                     alpha = strtof(argv[4], nullptr);
 
-                    if(argc > 5)
+                    if (argc > 5)
                     {
                         beta = strtof(argv[5], nullptr);
                     }
@@ -134,7 +136,7 @@ public:
         dst.allocator()->allocate();
 
         // Fill the input images with either the data provided or random data
-        if(npy0.is_open())
+        if (npy0.is_open())
         {
             npy0.fill_tensor(src0);
             npy1.fill_tensor(src1);
@@ -142,7 +144,7 @@ public:
             output_filename = "sgemm_out.npy";
             is_fortran      = npy0.is_fortran();
 
-            if(npy2.is_open())
+            if (npy2.is_open())
             {
                 src2.allocator()->allocate();
                 npy2.fill_tensor(src2);
@@ -169,7 +171,7 @@ public:
     }
     void do_teardown() override
     {
-        if(!output_filename.empty()) /* Save to .npy file */
+        if (!output_filename.empty()) /* Save to .npy file */
         {
             save_to_npy(dst, output_filename, is_fortran);
         }
diff --git a/examples/neoncl_scale_median_gaussian.cpp b/examples/neoncl_scale_median_gaussian.cpp
deleted file mode 100644
index 1b26517d9f..0000000000
--- a/examples/neoncl_scale_median_gaussian.cpp
+++ /dev/null
@@ -1,141 +0,0 @@
-/*
- * Copyright (c) 2016-2018 ARM Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */
-#error "This example needs to be built with -DARM_COMPUTE_CL"
-#endif /* ARM_COMPUTE_CL */
-
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/CLFunctions.h"
-#include "arm_compute/runtime/CL/CLScheduler.h"
-#include "arm_compute/runtime/NEON/NEFunctions.h"
-#include "utils/ImageLoader.h"
-#include "utils/Utils.h"
-
-using namespace arm_compute;
-using namespace utils;
-
-/** Example demonstrating how to use both CL and NEON functions in the same pipeline
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-class NEONCLScaleMedianGaussianExample : public Example
-{
-public:
-    bool do_setup(int argc, char **argv) override
-    {
-        /** [NEON / OpenCL Interop] */
-        PPMLoader ppm;
-
-        CLScheduler::get().default_init();
-
-        if(argc < 2)
-        {
-            // Print help
-            std::cout << "Usage: ./build/cl_convolution [input_image.ppm]\n\n";
-            std::cout << "No input_image provided, creating a dummy 640x480 image\n";
-            // Create an empty grayscale 640x480 image
-            src.allocator()->init(TensorInfo(640, 480, Format::U8));
-        }
-        else
-        {
-            ppm.open(argv[1]);
-            ppm.init_image(src, Format::U8);
-        }
-
-        TensorInfo scale_median_info(TensorInfo(src.info()->dimension(0) / 2, src.info()->dimension(1) / 2, Format::U8));
-
-        // Configure the temporary and destination images
-        scale_median.allocator()->init(scale_median_info);
-        median_gauss.allocator()->init(scale_median_info);
-        dst.allocator()->init(scale_median_info);
-
-        scale.configure(&src, &scale_median, InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::REPLICATE);
-        median.configure(&scale_median, &median_gauss, BorderMode::REPLICATE);
-        gauss.configure(&median_gauss, &dst, BorderMode::REPLICATE);
-
-        // Allocate all the images
-        src.allocator()->allocate();
-        scale_median.allocator()->allocate();
-        median_gauss.allocator()->allocate();
-        dst.allocator()->allocate();
-
-        // Fill the input image with the content of the PPM image if a filename was provided:
-        if(ppm.is_open())
-        {
-            ppm.fill_image(src);
-            const std::string output_filename = std::string(argv[1]) + "_out.ppm";
-        }
-        /** [NEON / OpenCL Interop] */
-
-        return true;
-    }
-    void do_run() override
-    {
-        // Enqueue and flush the OpenCL kernel:
-        scale.run();
-
-        // Do a blocking map of the input and output buffers of the NEON function:
-        scale_median.map();
-        median_gauss.map();
-
-        // Run the NEON function:
-        median.run();
-
-        // Unmap the output buffer before it's used again by OpenCL:
-        scale_median.unmap();
-        median_gauss.unmap();
-
-        // Run the final OpenCL function:
-        gauss.run();
-
-        // Make sure all the OpenCL jobs are done executing:
-        CLScheduler::get().sync();
-    }
-    void do_teardown() override
-    {
-        // Save the result to file:
-        if(!output_filename.empty())
-        {
-            save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM
-        }
-    }
-
-private:
-    CLImage       src{}, scale_median{}, median_gauss{}, dst{};
-    CLScale       scale{};
-    NEMedian3x3   median{};
-    CLGaussian5x5 gauss{};
-    std::string   output_filename{};
-};
-
-/** Main program for neon/cl scale median gaussian test
- *
- * @param[in] argc Number of arguments
- * @param[in] argv Arguments ( [optional] Path to PPM image to process )
- */
-int main(int argc, char **argv)
-{
-    return utils::run_example<NEONCLScaleMedianGaussianExample>(argc, argv);
-}