diff options
Diffstat (limited to 'examples')
57 files changed, 8247 insertions, 7193 deletions
diff --git a/examples/BUILD.bazel b/examples/BUILD.bazel new file mode 100644 index 0000000000..ad9abf50a2 --- /dev/null +++ b/examples/BUILD.bazel @@ -0,0 +1,540 @@ +# Copyright (c) 2023 Arm Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +#--------------------------------------------------------------------- +# Neon examples + +cc_binary( + name = "neon_cnn", + srcs = ["neon_cnn.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "neon_copy_objects", + srcs = ["neon_copy_objects.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "neon_gemm_qasymm8", + srcs = ["neon_gemm_qasymm8.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "neon_permute", + srcs = ["neon_permute.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "neon_scale", + srcs = ["neon_scale.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "neon_sgemm", + srcs = ["neon_sgemm.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +#--------------------------------------------------------------------- +# Graph examples + +cc_binary( + name = "graph_alexnet", + srcs = ["graph_alexnet.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_deepspeech_v0_4_1", + srcs = ["graph_deepspeech_v0_4_1.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_edsr", + srcs = [ + "graph_edsr.cpp", + "graph_edsr.h", + ], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_googlenet", + srcs = ["graph_googlenet.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_inception_resnet_v1", + srcs = ["graph_inception_resnet_v1.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_inception_resnet_v2", + srcs = ["graph_inception_resnet_v2.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_inception_v3", + srcs = ["graph_inception_v3.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_inception_v4", + srcs = ["graph_inception_v4.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_lenet", + srcs = ["graph_lenet.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_mobilenet", + srcs = ["graph_mobilenet.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_mobilenet_v2", + srcs = ["graph_mobilenet_v2.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_resnet12", + srcs = ["graph_resnet12.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_resnet50", + srcs = ["graph_resnet50.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_resnet_v2_50", + srcs = ["graph_resnet_v2_50.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_resnext50", + srcs = ["graph_resnext50.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_shufflenet", + srcs = ["graph_shufflenet.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_squeezenet", + srcs = ["graph_squeezenet.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_squeezenet_v1_1", + srcs = ["graph_squeezenet_v1_1.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_srcnn955", + srcs = ["graph_srcnn955.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_ssd_mobilenet", + srcs = ["graph_ssd_mobilenet.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_vgg16", + srcs = ["graph_vgg16.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_vgg19", + srcs = ["graph_vgg19.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_vgg_vdsr", + srcs = ["graph_vgg_vdsr.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) + +cc_binary( + name = "graph_yolov3", + srcs = ["graph_yolov3.cpp"], + copts = select({ + "//:arch_armv8-a": ["-march=armv8-a"], + "//:arch_armv8.2-a+fp16": ["-march=armv8.2-a+fp16"], + "//conditions:default": ["-march=armv8-a"], + }), + linkstatic = False, + deps = [ + "//:arm_compute", + "//:arm_compute_graph", + "//include", + "//utils", + ], +) diff --git a/examples/CMakeLists.txt b/examples/CMakeLists.txt new file mode 100644 index 0000000000..6b7fbded5d --- /dev/null +++ b/examples/CMakeLists.txt @@ -0,0 +1,57 @@ +# Copyright (c) 2023-2024 Arm Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +set(EXAMPLE_GRAPH_NAMES + graph_alexnet + graph_deepspeech_v0_4_1 + graph_edsr + graph_googlenet + graph_inception_resnet_v1 + graph_inception_resnet_v2 + graph_inception_v3 + graph_inception_v4 + graph_lenet + graph_mobilenet_v2 + graph_mobilenet + graph_resnet_v2_50 + graph_resnet12 + graph_resnet50 + graph_resnext50 + graph_shufflenet + graph_squeezenet_v1_1 + graph_squeezenet + graph_srcnn955 + graph_ssd_mobilenet + graph_vgg_vdsr + graph_vgg16 + graph_vgg19 + graph_yolov3 + PARENT_SCOPE) + +set(EXAMPLE_NEON_NAMES + neon_cnn neon_copy_objects + neon_gemm_qasymm8 + neon_gemm_s8_f32 + neon_permute + neon_scale + neon_sgemm + PARENT_SCOPE) diff --git a/examples/SConscript b/examples/SConscript index 8971d3c6d8..8ece7e60b2 100644 --- a/examples/SConscript +++ b/examples/SConscript @@ -1,4 +1,7 @@ -# Copyright (c) 2017 ARM Limited. +#!/usr/bin/python +# -*- coding: utf-8 -*- + +# Copyright (c) 2017-2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -32,22 +35,38 @@ examples_env.Append(CPPPATH = ["#"]) # Build examples utils = examples_env.Object("../utils/Utils.cpp") -if env['os'] in ['android', 'bare_metal'] or env['standalone']: +if env['os'] in ['android', 'macos', 'bare_metal'] or env['standalone']: Import('arm_compute_graph_a') Import('arm_compute_a') - Import('arm_compute_core_a') - arm_compute_libs = [ arm_compute_a, arm_compute_core_a ] + arm_compute_libs = [ arm_compute_a ] arm_compute_graph_libs = arm_compute_libs # The graph library needs to be linked separately with --whole-archive arm_compute_dependency = arm_compute_a graph_dependency = [arm_compute_graph_a] else: Import('arm_compute_graph_so') Import('arm_compute_so') - arm_compute_libs = ["arm_compute", "arm_compute_core"] + arm_compute_libs = ["arm_compute"] arm_compute_graph_libs = [ "arm_compute_graph" ] + arm_compute_libs arm_compute_dependency = arm_compute_so graph_dependency = [arm_compute_graph_so] +extra_link_flags = [] + +if not env['os'] in ['windows','bare_metal'] : + extra_link_flags += ['-fstack-protector-strong'] + + +if env['os'] != 'windows' : + load_whole_archive = '-Wl,--whole-archive' + noload_whole_archive = '-Wl,--no-whole-archive' +else: + load_whole_archive = '/wholearchive' + noload_whole_archive = '' + +if 'macos' in examples_env['os']: + load_whole_archive = '-Wl,-force_load' + noload_whole_archive = '' + # Build graph examples graph_utils = examples_env.Object("../utils/GraphUtils.cpp") graph_utils += examples_env.Object("../utils/CommonGraphOptions.cpp") @@ -55,9 +74,8 @@ examples_libs = examples_env.get("LIBS",[]) for file in Glob("./graph_*.cpp"): example = os.path.basename(os.path.splitext(str(file))[0]) prog = None - - if env['os'] in ['android', 'bare_metal'] or env['standalone']: - prog = examples_env.Program(example, ["{}.cpp".format(example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive', '-fstack-protector-strong']) + if env['os'] in ['android','windows', 'macos', 'bare_metal'] or env['standalone']: + prog = examples_env.Program(example, ["{}.cpp".format(example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+[load_whole_archive, graph_dependency, noload_whole_archive] + extra_link_flags) Depends(prog, graph_dependency) prog = install_bin(prog) else: @@ -69,18 +87,20 @@ for file in Glob("./graph_*.cpp"): Default(alias) if env['opencl'] and env['neon']: + examples_env.Append(CPPDEFINES = ['ARM_COMPUTE_CL']) for file in Glob("./neoncl_*.cpp"): example = os.path.basename(os.path.splitext(str(file))[0]) - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = examples_libs + arm_compute_libs) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = examples_libs + arm_compute_libs) Depends(prog, arm_compute_dependency) prog = install_bin(prog) alias = examples_env.Alias(example, prog) Default(alias) if env['opencl']: + examples_env.Append(CPPDEFINES = ['ARM_COMPUTE_CL']) for file in Glob("./cl_*.cpp"): example = os.path.basename(os.path.splitext(str(file))[0]) - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = examples_libs + arm_compute_libs) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = examples_libs + arm_compute_libs) Depends(prog, arm_compute_dependency) prog = install_bin(prog) alias = examples_env.Alias(example, prog) @@ -91,9 +111,15 @@ if env['gemm_tuner'] and env['opencl']: for file in Glob("./gemm_tuner/cl_*.cpp"): example = os.path.basename(os.path.splitext(str(file))[0]) example = os.path.join("gemm_tuner", example) - prog = examples_env.Program(example, ["{}.cpp".format(example), utils, gemm_tuner_common_options], CPPDEFINES=['ARM_COMPUTE_CL'], LIBS = examples_libs + arm_compute_libs) - Depends(prog, arm_compute_dependency) - prog = install_bin(prog) + if env['os'] in ['android', 'macos', 'bare_metal'] or env['standalone']: + prog = examples_env.Program(example, ["{}.cpp".format(example), utils, gemm_tuner_common_options], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+[load_whole_archive, graph_dependency, noload_whole_archive, '-fstack-protector-strong'] ) + Depends(prog, graph_dependency) + prog = install_bin(prog) + else: + #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies + prog = examples_env.Program(example, ["{}.cpp".format(example), utils, gemm_tuner_common_options], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] ) + Depends(prog, graph_dependency) + prog = install_bin(prog) alias = examples_env.Alias(example, prog) Default(alias) @@ -103,7 +129,7 @@ if env['neon']: prog = None if env['os'] in ['bare_metal']: - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LINKFLAGS=examples_env["LINKFLAGS"]+['-fstack-protector'], LIBS = examples_libs + arm_compute_libs) + prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LINKFLAGS=examples_env["LINKFLAGS"], LIBS = examples_libs + arm_compute_libs) else: prog = examples_env.Program(example, ["{}.cpp".format(example), utils], LIBS = examples_libs + arm_compute_libs) @@ -112,28 +138,19 @@ if env['neon']: alias = examples_env.Alias(example, prog) Default(alias) -if env['gles_compute']: - for file in Glob("./gc_*.cpp"): +if env['external_tests_dir']: + for file in Glob(env['external_tests_dir'] + "/examples/graph_*.cpp"): example = os.path.basename(os.path.splitext(str(file))[0]) - prog = examples_env.Program(example, ["{}.cpp".format(example), utils], CPPDEFINES=['ARM_COMPUTE_GC'], LIBS = examples_libs + arm_compute_libs) - Depends(prog, arm_compute_dependency) - prog = install_bin(prog) + prog = None + + if env['os'] in ['android', 'macos', 'bare_metal'] or env['standalone']: + prog = examples_env.Program(example, [examples_env.Object(source=file, target=example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+[load_whole_archive, graph_dependency, noload_whole_archive] + extra_link_flags) + Depends(prog, graph_dependency) + prog = install_bin(prog) + else: + #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies + prog = examples_env.Program(example, [examples_env.Object(source=file, target=example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] ) + Depends(prog, graph_dependency) + prog = install_bin(prog) alias = examples_env.Alias(example, prog) Default(alias) - -#FIXME Delete 3rdparty builds before release -for file in Glob("#3rdparty/examples/graph_*.cpp"): - example = os.path.basename(os.path.splitext(str(file))[0]) - prog = None - - if env['os'] in ['android', 'bare_metal'] or env['standalone']: - prog = examples_env.Program(example, [examples_env.Object(source=file, target=example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--whole-archive',graph_dependency,'-Wl,--no-whole-archive', '-fstack-protector-strong']) - Depends(prog, graph_dependency) - prog = install_bin(prog) - else: - #-Wl,--allow-shlib-undefined: Ignore dependencies of dependencies - prog = examples_env.Program(example, [examples_env.Object(source=file, target=example), utils, graph_utils], LIBS = examples_libs + arm_compute_graph_libs, LINKFLAGS=examples_env["LINKFLAGS"]+['-Wl,--allow-shlib-undefined'] ) - Depends(prog, graph_dependency) - prog = install_bin(prog) - alias = examples_env.Alias(example, prog) - Default(alias) diff --git a/examples/cl_cache.cpp b/examples/cl_cache.cpp index a1a2d2548a..9da5b9176d 100644 --- a/examples/cl_cache.cpp +++ b/examples/cl_cache.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,12 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/CL/CLFunctions.h" - +#include "arm_compute/core/CL/OpenCL.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/CL/CLHelpers.h" #include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/functions/CLPermute.h" #include "arm_compute/runtime/CL/Utils.h" + #include "utils/Utils.h" using namespace arm_compute; @@ -43,14 +44,15 @@ public: bool do_setup(int argc, char **argv) override { - std::cout << "Once the program has run and created the file cache.bin, rerun with --restore_cache." << std::endl; + std::cout << "Once the program has run and created the file cache.bin, rerun with --restore_cache." + << std::endl; CLScheduler::get().default_init(); - if(argc > 1) + if (argc > 1) { std::string argv1 = argv[1]; std::transform(argv1.begin(), argv1.end(), argv1.begin(), ::tolower); - if(argv1 == "--restore_cache") + if (argv1 == "--restore_cache") { // Load the precompiled kernels from a file into the kernel library, in this way the next time they are needed // compilation won't be required. @@ -110,11 +112,13 @@ private: window.use_tensor_dimensions(reference.info()->tensor_shape()); Iterator it_ref(&reference, window); Iterator it_res(&result, window); - execute_window_loop(window, [&](const Coordinates &) - { - assert(*reinterpret_cast<unsigned char *>(it_ref.ptr()) == *reinterpret_cast<unsigned char *>(it_res.ptr())); - }, - it_ref, it_res); + execute_window_loop( + window, + [&](const Coordinates &) { + assert(*reinterpret_cast<unsigned char *>(it_ref.ptr()) == + *reinterpret_cast<unsigned char *>(it_res.ptr())); + }, + it_ref, it_res); reference.unmap(); result.unmap(); } @@ -126,11 +130,9 @@ private: window.use_tensor_dimensions(tensor.info()->tensor_shape()); Iterator it_tensor(&tensor, window); unsigned char val(0); - execute_window_loop(window, [&](const Coordinates &) - { - *reinterpret_cast<unsigned char *>(it_tensor.ptr()) = val++; - }, - it_tensor); + execute_window_loop( + window, [&](const Coordinates &) { *reinterpret_cast<unsigned char *>(it_tensor.ptr()) = val++; }, + it_tensor); tensor.unmap(); } void init_tensor(const TensorShape shape, CLTensor &tensor, DataType type, DataLayout layout) diff --git a/examples/cl_convolution.cpp b/examples/cl_convolution.cpp deleted file mode 100644 index f2d19ef0cc..0000000000 --- a/examples/cl_convolution.cpp +++ /dev/null @@ -1,136 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */ -#error "This example needs to be built with -DARM_COMPUTE_CL" -#endif /* ARM_COMPUTE_CL */ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLFunctions.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace utils; - -/** Gaussian 3x3 matrix - */ -const std::array<int16_t, 9> gaussian3x3 = -{ - 1, 2, 1, - 2, 4, 2, - 1, 2, 1 -}; - -/** Gaussian 5x5 matrix - */ -const std::array<int16_t, 25> gaussian5x5 = -{ - 1, 4, 6, 4, 1, - 4, 16, 24, 16, 4, - 6, 24, 36, 24, 6, - 4, 16, 24, 16, 4, - 1, 4, 6, 4, 1 -}; - -class CLConvolutionExample : public Example -{ -public: - bool do_setup(int argc, char **argv) override - { - PPMLoader ppm; - - CLScheduler::get().default_init(); - - if(argc < 2) - { - // Print help - std::cout << "Usage: ./build/cl_convolution [input_image.ppm]\n\n"; - std::cout << "No input_image provided, creating a dummy 640x480 image\n"; - // Create an empty grayscale 640x480 image - src.allocator()->init(TensorInfo(640, 480, Format::U8)); - } - else - { - ppm.open(argv[1]); - ppm.init_image(src, Format::U8); - } - - // Configure the temporary and destination images - tmp.allocator()->init(*src.info()); - dst.allocator()->init(*src.info()); - - // Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5: - conv3x3.configure(&src, &tmp, gaussian3x3.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED); - conv5x5.configure(&tmp, &dst, gaussian5x5.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED); - - // Allocate all the images - src.allocator()->allocate(); - tmp.allocator()->allocate(); - dst.allocator()->allocate(); - // Fill the input image with the content of the PPM image if a filename was provided: - if(ppm.is_open()) - { - ppm.fill_image(src); - output_filename = std::string(argv[1]) + "_out.ppm"; - } - - return true; - } - void do_run() override - { - // Execute the functions: - conv3x3.run(); - conv5x5.run(); - - // Make sure all the OpenCL jobs are done executing: - CLScheduler::get().sync(); - } - void do_teardown() override - { - // Save the result to file: - if(!output_filename.empty()) - { - save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM - } - } - -private: - CLImage src{}; - CLImage tmp{}; - CLImage dst{}; - CLConvolution3x3 conv3x3{}; - CLConvolution5x5 conv5x5{}; - std::string output_filename{}; -}; - -/** Main program for convolution test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example<CLConvolutionExample>(argc, argv); -} diff --git a/examples/cl_events.cpp b/examples/cl_events.cpp deleted file mode 100644 index a9c508ac58..0000000000 --- a/examples/cl_events.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */ -#error "This example needs to be built with -DARM_COMPUTE_CL" -#endif /* ARM_COMPUTE_CL */ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLFunctions.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace utils; - -class CLEventsExample : public Example -{ -public: - bool do_setup(int argc, char **argv) override - { - /** [OpenCL events] **/ - PPMLoader ppm; - constexpr int scale_factor = 2; - - CLScheduler::get().default_init(); - - if(argc < 2) - { - // Print help - std::cout << "Usage: ./build/cl_events [input_image.ppm]\n\n"; - std::cout << "No input_image provided, creating a dummy 640x480 image\n"; - // Create an empty grayscale 640x480 image - src.allocator()->init(TensorInfo(640, 480, Format::U8)); - } - else - { - ppm.open(argv[1]); - ppm.init_image(src, Format::U8); - } - - TensorInfo dst_info(src.info()->dimension(0) / scale_factor, src.info()->dimension(1) / scale_factor, Format::U8); - - // Configure the temporary and destination images - dst.allocator()->init(dst_info); - tmp_scale_median.allocator()->init(dst_info); - tmp_median_gauss.allocator()->init(dst_info); - - //Configure the functions: - scale.configure(&src, &tmp_scale_median, InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::REPLICATE); - median.configure(&tmp_scale_median, &tmp_median_gauss, BorderMode::REPLICATE); - gauss.configure(&tmp_median_gauss, &dst, BorderMode::REPLICATE); - - // Allocate all the images - src.allocator()->allocate(); - dst.allocator()->allocate(); - tmp_scale_median.allocator()->allocate(); - tmp_median_gauss.allocator()->allocate(); - - // Fill the input image with the content of the PPM image if a filename was provided: - if(ppm.is_open()) - { - ppm.fill_image(src); - output_filename = std::string(argv[1]) + "_out.ppm"; - } - /** [OpenCL events] **/ - - return true; - } - void do_run() override - { - // Enqueue and flush the scale OpenCL kernel: - scale.run(); - // Create a synchronisation event between scale and median: - cl::Event scale_event = CLScheduler::get().enqueue_sync_event(); - // Enqueue and flush the median OpenCL kernel: - median.run(); - // Enqueue and flush the Gaussian OpenCL kernel: - gauss.run(); - - //Make sure all the OpenCL jobs are done executing: - scale_event.wait(); // Block until Scale is done executing (Median3x3 and Gaussian5x5 might still be running) - CLScheduler::get().sync(); // Block until Gaussian5x5 is done executing - } - void do_teardown() override - { - // Save the result to file: - if(!output_filename.empty()) - { - save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM - } - } - -private: - CLImage src{}, tmp_scale_median{}, tmp_median_gauss{}, dst{}; - CLScale scale{}; - CLMedian3x3 median{}; - CLGaussian5x5 gauss{}; - std::string output_filename{}; -}; - -/** Main program for convolution test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example<CLEventsExample>(argc, argv); -} diff --git a/examples/cl_sgemm.cpp b/examples/cl_sgemm.cpp index 8e0263dde2..68955c52f7 100644 --- a/examples/cl_sgemm.cpp +++ b/examples/cl_sgemm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2019 ARM Limited. + * Copyright (c) 2017-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,9 +26,10 @@ #endif /* ARM_COMPUTE_CL */ #include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" +#include "arm_compute/runtime/CL/functions/CLGEMM.h" + #include "utils/Utils.h" #include <cstdlib> @@ -50,15 +51,16 @@ public: CLScheduler::get().default_init(&tuner); std::ifstream stream; - if(argc > 1) + if (argc > 1) { stream.open(argv[1], std::fstream::in); } - if(argc < 3 || (argc < 4 && stream.bad())) + if (argc < 3 || (argc < 4 && stream.bad())) { // Print help - std::cout << "Usage: 1) ./build/cl_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha = 1] [beta = 0]\n"; + std::cout << "Usage: 1) ./build/cl_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha " + "= 1] [beta = 0]\n"; std::cout << " 2) ./build/cl_sgemm M N K [alpha = 1.0f] [beta = 0.0f]\n\n"; std::cout << "Too few or no input_matrices provided. Using M=7, N=3, K=5, alpha=1.0f and beta=0.0f\n\n"; @@ -68,29 +70,29 @@ public: } else { - if(stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */ + if (stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */ { npy0.open(argv[1]); npy0.init_tensor(src0, DataType::F32); npy1.open(argv[2]); npy1.init_tensor(src1, DataType::F32); - if(argc > 3) + if (argc > 3) { stream.close(); stream.clear(); stream.open(argv[3], std::fstream::in); - if(stream.good()) /* case with third file */ + if (stream.good()) /* case with third file */ { npy2.open(argv[3]); npy2.init_tensor(src2, DataType::F32); - if(argc > 4) + if (argc > 4) { // Convert string to float alpha = strtof(argv[4], nullptr); - if(argc > 5) + if (argc > 5) { // Convert string to float beta = strtof(argv[5], nullptr); @@ -101,7 +103,7 @@ public: { alpha = strtof(argv[3], nullptr); - if(argc > 4) + if (argc > 4) { beta = strtof(argv[4], nullptr); } @@ -118,11 +120,11 @@ public: src1.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32)); src2.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32)); - if(argc > 4) + if (argc > 4) { alpha = strtof(argv[4], nullptr); - if(argc > 5) + if (argc > 5) { beta = strtof(argv[5], nullptr); } @@ -141,7 +143,7 @@ public: dst.allocator()->allocate(); // Fill the input images with either the data provided or random data - if(npy0.is_open()) + if (npy0.is_open()) { npy0.fill_tensor(src0); npy1.fill_tensor(src1); @@ -149,7 +151,7 @@ public: output_filename = "sgemm_out.npy"; is_fortran = npy0.is_fortran(); - if(npy2.is_open()) + if (npy2.is_open()) { src2.allocator()->allocate(); npy2.fill_tensor(src2); @@ -179,7 +181,7 @@ public: } void do_teardown() override { - if(!output_filename.empty()) /* Save to .npy file */ + if (!output_filename.empty()) /* Save to .npy file */ { save_to_npy(dst, output_filename, is_fortran); } diff --git a/examples/gc_absdiff.cpp b/examples/gc_absdiff.cpp deleted file mode 100644 index 6793df040f..0000000000 --- a/examples/gc_absdiff.cpp +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef ARM_COMPUTE_GC /* Needed by Utils.cpp to handle OpenGL ES exceptions properly */ -#error "This example needs to be built with -DARM_COMPUTE_GC" -#endif /* ARM_COMPUTE_GC */ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h" -#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace utils; - -class GCAbsDiffExample : public Example -{ -public: - bool do_setup(int argc, char **argv) override - { - PPMLoader ppm1{}; - PPMLoader ppm2{}; - - GCScheduler::get().default_init(); - if(argc < 2) - { - // Print help - std::cout << "Usage: " << argv[0] << " [input0_image.ppm] [input1_image.ppm] \n\n"; - std::cout << "No input_image provided, creating two dummy 640x480 images\n"; - // Create two empty grayscale 640x480 images - src1.allocator()->init(TensorInfo(640, 480, Format::U8)); - src2.allocator()->init(TensorInfo(640, 480, Format::U8)); - } - else if(argc < 3) - { - // Print help - std::cout << "Usage: " << argv[0] << " [input0_image.ppm] [input1_image.ppm] \n\n"; - std::cout << "Only one input_image provided, creating a dummy 640x480 image\n"; - ppm1.open(argv[1]); - ppm1.init_image(src1, Format::U8); - // Create an empty grayscale 640x480 image - src2.allocator()->init(TensorInfo(640, 480, Format::U8)); - } - else - { - ppm1.open(argv[1]); - ppm1.init_image(src1, Format::U8); - ppm2.open(argv[2]); - ppm2.init_image(src2, Format::U8); - } - - // Configure the temporary and destination images - dst.allocator()->init(*src1.info()); - - absdiff.configure(&src1, &src2, &dst); - - // Allocate all the images - src1.allocator()->allocate(); - src2.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill the input image with the content of the PPM image if a filename was provided: - if(ppm1.is_open()) - { - ppm1.fill_image(src1); - output_filename = std::string(argv[1]) + "_out.ppm"; - } - if(ppm2.is_open()) - { - ppm2.fill_image(src2); - } - - return true; - } - void do_run() override - { - // Execute the functions: - absdiff.run(); - } - void do_teardown() override - { - // Save the result to file: - if(!output_filename.empty()) - { - // save_to_ppm maps and unmaps the image to store as PPM - // The GCTensor::map call inside the save_to_ppm will block until all pending operations on that image have completed - save_to_ppm(dst, output_filename); - } - } - -private: - GCImage src1{}, src2{}, dst{}; - GCAbsoluteDifference absdiff{}; - std::string output_filename{}; -}; - -/** Main program for absdiff test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to the first PPM image to process, [optional] Path the the second PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example<GCAbsDiffExample>(argc, argv); -} diff --git a/examples/gc_dc.cpp b/examples/gc_dc.cpp deleted file mode 100644 index 6d09eba87c..0000000000 --- a/examples/gc_dc.cpp +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2017-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_GC -#error "This example needs to be built with -DARM_COMPUTE_GC" -#endif /* ARM_COMPUTE_GC */ - -#include "arm_compute/runtime/GLES_COMPUTE/GCFunctions.h" -#include "arm_compute/runtime/GLES_COMPUTE/GCScheduler.h" -#include "half/half.hpp" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace utils; - -class GCDCExample : public Example -{ -public: - bool do_setup(int argc, char **argv) override - { - ARM_COMPUTE_UNUSED(argc); - ARM_COMPUTE_UNUSED(argv); - - // init instance - GCScheduler::get().default_init(); - - const TensorShape src_shape = TensorShape{ 11U /* W */, 13U /* H */, 4U /* C */, 3U /* N */ }; - const unsigned int kernel_size = 3; - const int stride_x = 1; - const int stride_y = 1; - const int pad_x = 0; - const int pad_y = 0; - const unsigned int num_kernels = 256; - const DataType data_type = DataType::F16; - - // generate shape - const TensorShape weights_shape(kernel_size, kernel_size, src_shape.z(), num_kernels); - const TensorShape bias_shape(num_kernels); - const PadStrideInfo pad_info(stride_x, stride_y, pad_x, pad_y, DimensionRoundingType::FLOOR); - - // output shape should be 9*11*256*3 (W*H*C*N) - const TensorShape dst_shape = get_output_shape(src_shape, weights_shape, pad_info); - - // create tensors - src.allocator()->init(TensorInfo(src_shape, 1, data_type)); - weights.allocator()->init(TensorInfo(weights_shape, 1, data_type)); - bias.allocator()->init(TensorInfo(bias_shape, 1, data_type)); - dst.allocator()->init(TensorInfo(dst_shape, 1, data_type)); - - // configure layer - conv.configure(&src, &weights, &bias, &dst, pad_info); - - // allocate tensors - src.allocator()->allocate(); - weights.allocator()->allocate(); - bias.allocator()->allocate(); - dst.allocator()->allocate(); - - // To demonstrate how to fill tensor with some values... - src.map(); - Window window; - window.use_tensor_dimensions(src_shape); - Iterator it(&src, window); - execute_window_loop(window, [&](const Coordinates &) - { - *reinterpret_cast<half_float::half *>(it.ptr()) = half_float::half(1.f); - }); - src.unmap(); - - return true; - } - void do_run() override - { - // run the layer - conv.run(); - } - void do_teardown() override - { - // check result - dst.map(); - // do something - dst.unmap(); - } - -private: - GCTensor src{}, weights{}, bias{}, dst{}; - GCDirectConvolutionLayer conv{}; - - TensorShape get_output_shape(TensorShape in_shape, TensorShape kernel_shape, const PadStrideInfo &info) - { - TensorShape out_shape(in_shape); - const std::pair<unsigned int, unsigned int> scaled_dims = scaled_dimensions(in_shape.x(), - in_shape.y(), - kernel_shape.x(), - kernel_shape.y(), - info); - out_shape.set(0, scaled_dims.first); - out_shape.set(1, scaled_dims.second); - out_shape.set(2, kernel_shape[3]); - return out_shape; - } -}; - -/** Main program for directconvolution test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments - */ -int main(int argc, char **argv) -{ - return utils::run_example<GCDCExample>(argc, argv); -} diff --git a/examples/gemm_tuner/CommonGemmExampleOptions.cpp b/examples/gemm_tuner/CommonGemmExampleOptions.cpp index a93d0191b3..c2a465604a 100644 --- a/examples/gemm_tuner/CommonGemmExampleOptions.cpp +++ b/examples/gemm_tuner/CommonGemmExampleOptions.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -34,30 +34,53 @@ using namespace utils; os << "N : " << common_params.N << std::endl; os << "K : " << common_params.K << std::endl; os << "B : " << common_params.B << std::endl; + os << "Data type : " << common_params.data_type << std::endl; + os << "OpenCL tuner mode : " << common_params.tuner_mode << std::endl; return os; } -CommonGemmExampleOptions::CommonGemmExampleOptions(CommandLineParser &parser) +CommonGemmExampleOptions::CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser, + arm_compute::DataType default_data_type) : help(parser.add_option<ToggleOption>("help")), M(parser.add_positional_option<SimpleOption<size_t>>("M", 100)), N(parser.add_positional_option<SimpleOption<size_t>>("N", 100)), K(parser.add_positional_option<SimpleOption<size_t>>("K", 50)), - B(parser.add_positional_option<SimpleOption<size_t>>("B", 1)) + B(parser.add_positional_option<SimpleOption<size_t>>("B", 1)), + data_type(), + tuner_mode() { + const std::set<DataType> supported_data_types{ + DataType::F16, + DataType::F32, + DataType::QASYMM8, + }; + + const std::set<CLTunerMode> supported_tuner_modes{CLTunerMode::EXHAUSTIVE, CLTunerMode::NORMAL, CLTunerMode::RAPID}; + + ARM_COMPUTE_ERROR_ON_MSG(supported_data_types.find(default_data_type) == supported_data_types.end(), + "Default data type unsupported"); + + data_type = parser.add_option<EnumOption<DataType>>("type", supported_data_types, default_data_type); + tuner_mode = parser.add_option<EnumOption<CLTunerMode>>("tuner-mode", supported_tuner_modes, CLTunerMode::RAPID); + help->set_help("Show this help message."); M->set_help("Number of lhs matrix rows."); N->set_help("Number of rhs matrix columns."); K->set_help("Number of lhs matrix columns/rhs matrix rows."); B->set_help("Batch size."); + data_type->set_help("Data type to use"); + tuner_mode->set_help("OpenCL tuner mode"); } CommonGemmExampleParams consume_common_gemm_example_parameters(const CommonGemmExampleOptions &options) { CommonGemmExampleParams common_params; - common_params.M = options.M->value(); - common_params.N = options.N->value(); - common_params.K = options.K->value(); - common_params.B = options.B->value(); + common_params.M = options.M->value(); + common_params.N = options.N->value(); + common_params.K = options.K->value(); + common_params.B = options.B->value(); + common_params.data_type = options.data_type->value(); + common_params.tuner_mode = options.tuner_mode->value(); return common_params; } } // namespace gemm_tuner diff --git a/examples/gemm_tuner/CommonGemmExampleOptions.h b/examples/gemm_tuner/CommonGemmExampleOptions.h index 5f079abfc1..38178bcef8 100644 --- a/examples/gemm_tuner/CommonGemmExampleOptions.h +++ b/examples/gemm_tuner/CommonGemmExampleOptions.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,18 +24,25 @@ #ifndef ARM_COMPUTE_EXAMPLES_GEMM_TUNER_COMMON_GEMM_EXAMPLE_OPTIONS #define ARM_COMPUTE_EXAMPLES_GEMM_TUNER_COMMON_GEMM_EXAMPLE_OPTIONS +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" +#include "arm_compute/runtime/CL/CLTuner.h" + #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/TypePrinter.h" namespace gemm_tuner { /** Structure holding all the common gemm example parameters */ struct CommonGemmExampleParams { - size_t M{ 100 }; /**< Number of lhs matrix rows */ - size_t N{ 100 }; /**< Number of rhs matrix columns */ - size_t K{ 50 }; /**< Number of lhs matrix columns/rhs matrix rows */ - size_t B{ 1 }; /**< Batch size */ + size_t M{100}; /**< Number of lhs matrix rows */ + size_t N{100}; /**< Number of rhs matrix columns */ + size_t K{50}; /**< Number of lhs matrix columns/rhs matrix rows */ + size_t B{1}; /**< Batch size */ + arm_compute::DataType data_type{arm_compute::DataType::F32}; /**< Data type */ + arm_compute::CLTunerMode tuner_mode{arm_compute::CLTunerMode::RAPID}; /**< OpenCL tuner mode */ }; /** Formatted output of the CommonGemmExampleParams type @@ -61,9 +68,11 @@ class CommonGemmExampleOptions public: /** Constructor * - * @param[in,out] parser A parser on which "parse()" hasn't been called yet. + * @param[in,out] parser A parser on which "parse()" hasn't been called yet. + * @param[in] default_data_type Default data type if unspecified. */ - CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser); + CommonGemmExampleOptions(arm_compute::utils::CommandLineParser &parser, + arm_compute::DataType default_data_type = arm_compute::DataType::F32); /** Prevent instances of this class from being copied (As this class contains pointers) */ CommonGemmExampleOptions(const CommonGemmExampleOptions &) = delete; /** Prevent instances of this class from being copied (As this class contains pointers) */ @@ -80,6 +89,8 @@ public: arm_compute::utils::SimpleOption<size_t> *N; /**< Number of rhs matrix columns option */ arm_compute::utils::SimpleOption<size_t> *K; /**< Number of lhs matrix columns/rhs matrix rows option */ arm_compute::utils::SimpleOption<size_t> *B; /**< Batch size option */ + arm_compute::utils::EnumOption<arm_compute::DataType> *data_type; /**< Data type */ + arm_compute::utils::EnumOption<arm_compute::CLTunerMode> *tuner_mode; /**< OpenCL tuner mode */ }; /** Consumes the common gemm example options and creates a structure containing all information diff --git a/examples/gemm_tuner/GemmTuner.py b/examples/gemm_tuner/GemmTuner.py index 29c414cfe8..ef1f31493e 100644 --- a/examples/gemm_tuner/GemmTuner.py +++ b/examples/gemm_tuner/GemmTuner.py @@ -1,4 +1,4 @@ -# Copyright (c) 2019 ARM Limited. +# Copyright (c) 2019-2020 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -41,18 +41,21 @@ from typing import Deque, Dict, Generator, List, NamedTuple, Set, Tuple, Union Strategy = Enum("Strategy", ["Native", "ReshapedOnlyRHS", "Reshaped"]) # Gemm parameter + + class GEMMParam(NamedTuple): M: int # Number of lhs matrix rows N: int # Number of rhs matrix columns K: int # Number of lhs matrix columns/rhs matrix rows B: int # Batch size + data_type: str # Data type - @staticmethod - def parse_from_strs(*args): - return GEMMParam(*map(int, args)) + @classmethod + def parse_from_strs(cls, *M_N_K_B, data_type): + return cls(*map(int, M_N_K_B), str(data_type)) def __str__(self): - return "-".join(map(str, self)) + return ",".join(map(str, self)) # Gemm configuration for strategy Native @@ -61,13 +64,13 @@ class NativeGEMMConfig(NamedTuple): n0: int # Number of columns processed by the matrix multiplication k0: int # Number of partial accumulations performed by the matrix multiplication - @staticmethod - def parse_from_strs(*args): - *mnk, = map(int, args) - return NativeGEMMConfig(*mnk) + @classmethod + def parse_from_strs(cls, *args): + (*mnk,) = map(int, args) + return cls(*mnk) def __str__(self): - return "-".join(map(str, self)) + return ",".join(map(str, self)) # Gemm configuration for strategy Reshaped Only RHS @@ -75,19 +78,25 @@ class ReshapedOnlyRHSGEMMConfig(NamedTuple): m0: int # Number of rows processed by the matrix multiplication n0: int # Number of columns processed by the matrix multiplication k0: int # Number of partial accumulations performed by the matrix multiplication - h0: int # Number of horizontal blocks of size (k0xn0) stored on the same output row - interleave_rhs: bool # Interleave rhs matrix (1) / Do not interleave rhs matrix (0) - transpose_rhs: bool # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) - - @staticmethod - def parse_from_strs(*args): - *mnkh, interleave_rhs, transpose_rhs = map(int, args) + # Number of horizontal blocks of size (k0xn0) stored on the same output row + h0: int + # Interleave rhs matrix (1) / Do not interleave rhs matrix (0) + interleave_rhs: bool + # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) + transpose_rhs: bool + # Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0) + export_to_cl_image_rhs: bool + + @classmethod + def parse_from_strs(cls, *args): + (*mnkh, interleave_rhs, transpose_rhs, export_to_cl_image_rhs,) = map(int, args) interleave_rhs = interleave_rhs == 1 transpose_rhs = transpose_rhs == 1 - return ReshapedOnlyRHSGEMMConfig(*mnkh, interleave_rhs, transpose_rhs) + export_to_cl_image_rhs = export_to_cl_image_rhs == 1 + return cls(*mnkh, interleave_rhs, transpose_rhs, export_to_cl_image_rhs) def __str__(self): - return "-".join(map(str, self)) + return ",".join(map(str, self)) # Gemm configuration for strategy Reshaped @@ -95,55 +104,90 @@ class ReshapedGEMMConfig(NamedTuple): m0: int # Number of rows processed by the matrix multiplication n0: int # Number of columns processed by the matrix multiplication k0: int # Number of partial accumulations performed by the matrix multiplication - v0: int # Number of vertical blocks of size (m0xk0) stored on the same output row - h0: int # Number of horizontal blocks of size (k0xn0) stored on the same output row - interleave_lhs: bool # Interleave lhs matrix (1) / Do not interleave lhs matrix (0) - interleave_rhs: bool # Interleave rhs matrix (1) / Do not interleave rhs matrix (0) - transpose_rhs: bool # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) - - @staticmethod - def parse_from_strs(*args): - *mnkvh, interleave_lhs, interleave_rhs, transpose_rhs = map(int, args) + # Number of vertical blocks of size (m0xk0) stored on the same output row + v0: int + # Number of horizontal blocks of size (k0xn0) stored on the same output row + h0: int + # Interleave lhs matrix (1) / Do not interleave lhs matrix (0) + interleave_lhs: bool + # Interleave rhs matrix (1) / Do not interleave rhs matrix (0) + interleave_rhs: bool + # Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) + transpose_rhs: bool + # Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0) + export_to_cl_image_rhs: bool + + @classmethod + def parse_from_strs(cls, *args): + (*mnkvh, interleave_lhs, interleave_rhs, transpose_rhs, export_to_cl_image_rhs,) = map(int, args) interleave_lhs = interleave_lhs == 1 interleave_rhs = interleave_rhs == 1 transpose_rhs = transpose_rhs == 1 - return ReshapedGEMMConfig(*mnkvh, interleave_lhs, interleave_rhs, transpose_rhs) + export_to_cl_image_rhs = export_to_cl_image_rhs == 1 + return cls(*mnkvh, interleave_lhs, interleave_rhs, transpose_rhs, export_to_cl_image_rhs) def __str__(self): - return "-".join(map(str, self)) + return ",".join(map(str, self)) # Measurement we take from the benchmark result. class Measurement(NamedTuple): - opencl_timer_ms: float + opencl_timer_ms_reshape: float + opencl_timer_ms_kernel: float + + def get_total_ms(self): + return self.opencl_timer_ms_reshape + self.opencl_timer_ms_kernel def is_close_to(self, other, tol): - return math.fabs(self.opencl_timer_ms - other.opencl_timer_ms) < tol + return math.fabs(self.get_total_ms() - other.get_total_ms()) < tol def is_better_than(self, other, tol): - return self < other and not self.is_close_to(other) + return self.get_total_ms() < other.get_total_ms() and not self.is_close_to( + other + ) def __add__(self, other): - return Measurement(self.opencl_timer_ms + other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape + other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel + other.opencl_timer_ms_kernel, + ) def __sub__(self, other): - return Measurement(self.opencl_timer_ms - other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape - other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel - other.opencl_timer_ms_kernel, + ) def __mul__(self, other): - return Measurement(self.opencl_timer_ms * other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape * other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel * other.opencl_timer_ms_kernel, + ) def __floordiv__(self, other): - return Measurement(self.opencl_timer_ms // other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape // other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel // other.opencl_timer_ms_kernel, + ) def __truediv__(self, other): - return Measurement(self.opencl_timer_ms / other.opencl_timer_ms) + return Measurement( + self.opencl_timer_ms_reshape / other.opencl_timer_ms_reshape, + self.opencl_timer_ms_kernel / other.opencl_timer_ms_kernel, + ) def __pow__(self, power): - return Measurement(self.opencl_timer_ms ** power) + return Measurement( + self.opencl_timer_ms_reshape ** power, self.opencl_timer_ms_kernel ** power + ) + + def __str__(self): + return ",".join(map(str, self)) # GEMMConfig Type -GEMMConfigT = Union[NativeGEMMConfig, ReshapedOnlyRHSGEMMConfig, ReshapedGEMMConfig] +GEMMConfigT = Union[NativeGEMMConfig, + ReshapedOnlyRHSGEMMConfig, ReshapedGEMMConfig] # Representation of the benchmark result from a single experiment @@ -154,24 +198,6 @@ class BenchmarkResult(NamedTuple): measurement: Measurement -# Representation of a single row of BenchmarkResult in CSV -# NOTE: In the CSV representation, we merge all fields of Gemm Config into a single field "GEMMConfig", but keep the -# fields of GEMMParam and Measurement -# The example entry including header would look like: -# M , N , K , B, Strategy , GEMMConfig , OpenCLTimer_MS -# 1225, 32, 192, 1, Reshaped , 4-4-4-3-1-1-1-0 , 0.3309 -BenchmarkResultCSVRow = namedtuple( - "BenchmarkResultCSVRow", GEMMParam._fields + ("Strategy", "GEMMConfig") + Measurement._fields -) - - -def benchmark_result_2_csv_row(result: BenchmarkResult) -> BenchmarkResultCSVRow: - """ Convert a BenchmarkResult into its CSV row form """ - return BenchmarkResultCSVRow( - *(result.gemm_param + (result.strategy.name, str(result.gemm_config)) + result.measurement) - ) - - class GEMMBenchmarkResultRecorder: """ A recorder that records and organises GEMM Benchmark results, and produces various reports on the record. """ @@ -210,7 +236,9 @@ class GEMMBenchmarkResultRecorder: best_gc_set = best_gc_sets.setdefault((gemm_param, strategy), []) best_gc_set.append((gemm_config, measurement)) # Sort the best config set (list) - best_gc_set = sorted(best_gc_set, key=lambda gc_and_m: gc_and_m[1]) + best_gc_set = sorted( + best_gc_set, key=lambda gc_and_m: gc_and_m[1].get_total_ms() + ) # Filter out configs that are beyond tolerance to the best GEMMConfig's measurement best_gc, best_m = best_gc_set[0] best_gc_set_new = [ @@ -228,9 +256,14 @@ class GEMMBenchmarkResultRecorder: """ Get the best GEMMConfig set per GEMMParam per Strategy, and flatten the result into a sequence of BenchmarkResults """ - for (gemm_param, strategy), best_gc_sets in self.get_best_gemm_configs().items(): + for ( + (gemm_param, strategy), + best_gc_sets, + ) in self.get_best_gemm_configs().items(): for best_gemm_config, best_measurement in best_gc_sets: - yield BenchmarkResult(gemm_param, strategy, best_gemm_config, best_measurement) + yield BenchmarkResult( + gemm_param, strategy, best_gemm_config, best_measurement + ) def get_config_distributions(self): """ Return GEMMConfigDistribution for each strategy @@ -244,38 +277,72 @@ class GEMMBenchmarkResultRecorder: return gemm_config_distributions - def save_to_csvs(self, out_dir, only_best_config=True): - """ Save records to an output directory of csv files. - The directory is organized such that each strategy gets its own CSV file. + def get_best_gemm_strategies(self): + """ Get the best Stratey per GEMMParam + """ + all_results: Dict[GEMMParam, List[Tuple[Strategy, Measurement]]] = defaultdict( + list + ) + + best_strategies: Dict[GEMMParam, Strategy] = {} + + for gemm_param, strategy, gemm_config, measurement in self.get_record(): + all_results[gemm_param].append((strategy, measurement)) + + for gemm_param, results_set in all_results.items(): + # Sort the best results set (list) + results_set = sorted( + results_set, key=lambda s_and_m: s_and_m[1].get_total_ms() + ) + # Select best Strategy + best_s, best_m = results_set[0] + best_strategies[gemm_param] = best_s + + return best_strategies + + def save_to_jsons(self, out_dir, only_best_config=True): + """ Save records to an output directory of JSON files. + The directory is organized such that each strategy gets its own JSON file. + The directory also includes a JSON file to define the best strategy per GEMM Param. """ if not os.path.exists(out_dir): - logging.info("Output directory {} does not exist. Creating...".format(out_dir)) + logging.info( + "Output directory {} does not exist. Creating...".format( + out_dir) + ) os.mkdir(out_dir) + + out_json_path = os.path.join(out_dir, "gemm_type_selection.json") + if check_out_path(out_json_path): + results = self.get_best_gemm_strategies() + results = {str(key): value.name for key, value in results.items()} + dump_json(out_json_path, results) + for strategy in self._strategies: - out_csv_path = os.path.join(out_dir, strategy.name) - if os.path.exists(out_csv_path): - overwrite = ( - input( - "Output CSV {} already exists. Overwrite? [Y/N]: ".format(out_csv_path) - ).lower() - == "y" - ) - if not overwrite: - logging.info("Skipping {}".format(out_csv_path)) - continue - logging.info("Saving csv file to {}".format(out_csv_path)) - record = ( - self.get_best_gemm_configs_as_sequence() if only_best_config else self.get_record() + out_json_path = os.path.join( + out_dir, ("gemm_config_" + strategy.name.lower() + ".json") ) - with open(out_csv_path, "w") as f: - csv_writer = csv.DictWriter(f, fieldnames=BenchmarkResultCSVRow._fields) - csv_writer.writeheader() - csv_writer.writerows( - benchmark_result_2_csv_row(res)._asdict() - for res in record - if res.strategy == strategy + if check_out_path(out_json_path): + record = ( + self.get_best_gemm_configs_as_sequence() + if only_best_config + else self.get_record() ) - logging.info("Saved") + results = defaultdict(list) + for res in record: + if res.strategy == strategy: + results[str(res.gemm_param)].append( + { + "GEMMConfig": str(res.gemm_config), + "OpenCL_Timer_ms_reshape": str( + res.measurement.opencl_timer_ms_reshape + ), + "OpenCL_Timer_ms_kernel": str( + res.measurement.opencl_timer_ms_kernel + ), + } + ) + dump_json(out_json_path, results) def summary(self, sum_level=SummaryLevel.Short): """ Return the summary string of the record @@ -314,9 +381,9 @@ class GEMMConfigDistribution: def __init__(self): """ Initializer """ - self._gemm_config_dist: Dict[GEMMConfig, List[Tuple[GEMMParam, Measurement]]] = defaultdict( - list - ) + self._gemm_config_dist: Dict[ + GEMMConfig, List[Tuple[GEMMParam, Measurement]] + ] = defaultdict(list) self._gemm_config_freq = Counter() def add(self, benchmark_result: BenchmarkResult): @@ -376,14 +443,15 @@ EXAMPLE_FILE_2_STRATEGY = { # GEMMParam + GEMMConfig # in that order. # For example, the example args of running a reshaped rhs only example could be: -# 100,100,100,1, 4, 4, 4, 1, 1, 1 -# M ,N ,K, B,m0,n0,k0,h0,interleave_rhs,transpose_rhs -# <-GEMMParam-><-------------GEMMConfig--------------> +# 100,100,100,1, 4, 4, 4, 1, 1, 1, 0 +# M ,N ,K, B,m0,n0,k0,h0,interleave_rhs,transpose_rhs,export_to_cl_image_rhs +# <-GEMMParam-><-------------GEMMConfig---------------------------------------> # Note that the test strategy_name == strategy.name is in place to avoid unwanted enum aliases GEMM_EXAMPLE_ARGS_FACTORY = { + # We ignore the data type field from GEMMParam as that is extracted separately strategy: namedtuple( "{}_Gemm_Example_Args".format(strategy_name), - GEMMParam._fields + GEMM_CONFIG_FACTORY[strategy]._fields, + GEMMParam._fields[:-1] + GEMM_CONFIG_FACTORY[strategy]._fields, ) for strategy_name, strategy in Strategy.__members__.items() if strategy_name == strategy.name @@ -398,8 +466,11 @@ BENCHMARK_RESULT_JSON_EXTENSION = "gemmtuner_benchmark" def parse_benchmark_commandline(commandline: str) -> Dict[str, str]: - """ Parse the benchmark example command-line string into a dictionary of command-line agruments + """ Parse the benchmark example command-line string into a dictionary of command-line arguments """ + # Separate the data type option from the example_args portion of the string + commandline = commandline.replace(",--type=", " --type=") + args = commandline.split() # Discard program name args = args[1:] @@ -439,30 +510,47 @@ def extract_benchmark_results( # Get gemm params + gemm configs from example args benchmark_args = parse_benchmark_commandline(json_res["CommandLine"]) Gemm_Example_Args_T = GEMM_EXAMPLE_ARGS_FACTORY[strategy] - example_args = Gemm_Example_Args_T(*(benchmark_args["example_args"].split(","))) + example_args = Gemm_Example_Args_T( + *(benchmark_args["example_args"].split(","))) # Gemm_Example_Arg consists of GEMMParam first and then GEMMConfig (in that order) - gemm_param_fields_len = len(GEMMParam._fields) - gemm_param = GEMMParam.parse_from_strs(*example_args[:gemm_param_fields_len]) + # However data type option is parsed separately from end of options, hence -1 is applied to fields length + gemm_param_fields_len = len(GEMMParam._fields) - 1 + gemm_param = GEMMParam.parse_from_strs( + *example_args[:gemm_param_fields_len], + data_type = benchmark_args["type"]) GEMMConfig = GEMM_CONFIG_FACTORY[strategy] - gemm_config = GEMMConfig.parse_from_strs(*example_args[gemm_param_fields_len:]) + gemm_config = GEMMConfig.parse_from_strs( + *example_args[gemm_param_fields_len:]) # Get OpenCL_Time_Ms stats measurements = list(example_test_data["measurements"].items()) - # There should only be 1 instrument per run - assert len(measurements) == 1 - measurement_instrument, data = measurements.pop() - # Get instrument name and assert that it is the one we expect - measurement_instrument_name = measurement_instrument.split("/")[0] - assert measurement_instrument_name == "OpenCLTimer" - # Take either the minimum or the average of the raw data as the measurement value - if measurement_method == "min": - measurement_val = min(data["raw"]) - elif measurement_method == "avg": - measurement_val = sum(data["raw"]) / len(data["raw"]) - else: - raise ValueError("Invalid measurement method: {}".format(measurement_method)) - - measurement = Measurement(measurement_val) + # For reshaped RHS only we have two measurements (one also for the reshape kernel) + # Hence we must parse and sum them + measurement_ms_reshape = 0 + measurement_ms_kernel = 0 + for single_measurement in measurements: + measurement_instrument, data = single_measurement + # Get instrument name and assert that it is the one we expect + measurement_instrument_name = measurement_instrument.split("/")[0] + assert measurement_instrument_name == "OpenCLTimer" + # Take either the minimum or the average of the raw data as the measurement value + if measurement_method == "min": + measurement_val = min(data["raw"]) + elif measurement_method == "avg": + measurement_val = sum(data["raw"]) / len(data["raw"]) + else: + raise ValueError( + "Invalid measurement method: {}".format(measurement_method) + ) + + measurement_type = measurement_instrument.split("/")[1] + if "reshape" in measurement_type.split("_"): + measurement_ms_reshape = measurement_val + else: + measurement_ms_kernel = measurement_val + + measurement = Measurement( + measurement_ms_reshape, measurement_ms_kernel) yield BenchmarkResult(gemm_param, strategy, gemm_config, measurement) @@ -475,15 +563,42 @@ def parse_json(dir_name): yield json.load(res_fp) +def check_out_path(out_path): + if os.path.exists(out_path): + overwrite = ( + input( + "Output JSON {} already exists. Overwrite? [Y/N]: ".format( + out_path) + ).lower() + == "y" + ) + if not overwrite: + logging.info("Skipping {}".format(out_path)) + return False + logging.info("Saving JSON file to {}".format(out_path)) + return True + + +def dump_json(out_path, dict): + with open(out_path, "w") as f: + json.dump(dict, f) + logging.info("Saved") + + ################################################################################ # Main ################################################################################ def main(args): - logging.info("Searching best gemm configurations from {}".format(args.benchmark_results_dir)) + logging.info( + "Searching best gemm configurations from {}".format( + args.benchmark_results_dir) + ) - benchmark_results = extract_benchmark_results(parse_json(args.benchmark_results_dir)) + benchmark_results = extract_benchmark_results( + parse_json(args.benchmark_results_dir) + ) # Add all benchmark results to the recorder benchmark_result_recorder = GEMMBenchmarkResultRecorder(tol=args.tolerance) @@ -496,7 +611,8 @@ def main(args): recorder_sum_level = GEMMBenchmarkResultRecorder.SummaryLevel.Short # Print overall summary of the recorded results - logging.info(benchmark_result_recorder.summary(sum_level=recorder_sum_level)) + logging.info(benchmark_result_recorder.summary( + sum_level=recorder_sum_level)) # Get GEMM configuration distributions for each strategy all_config_dists = benchmark_result_recorder.get_config_distributions() @@ -508,12 +624,16 @@ def main(args): for config, freq in config_dist.frequency(): logging.debug("{}, {}".format(config, freq)) logging.info( - "Best GEMM Config: {} with std: {}".format(config_dist.best_config(), config_dist.std()) + "Best GEMM Config: {} with std: {}".format( + config_dist.best_config(), config_dist.std() + ) ) - # Save the recorded results to csv files in output directory + # Save the recorded results to JSON files in output directory if args.output_dir is not None: - benchmark_result_recorder.save_to_csvs(args.output_dir, only_best_config=(not args.debug)) + benchmark_result_recorder.save_to_jsons( + args.output_dir, only_best_config=(not args.debug) + ) if __name__ == "__main__": @@ -538,7 +658,7 @@ if __name__ == "__main__": metavar="PATH", action="store", type=str, - help="Path to directory that holds output csv files. One per strategy", + help="Path to directory that holds output JSON files. One for strategy selection and one per strategy for GEMM config selection", ) parser.add_argument( "-t", @@ -550,7 +670,11 @@ if __name__ == "__main__": milliseconds. Recommended value: <= 0.1 ms", ) parser.add_argument( - "-D", "--debug", dest="debug", action="store_true", help="Enable script debugging output" + "-D", + "--debug", + dest="debug", + action="store_true", + help="Enable script debugging output", ) args = parser.parse_args() logging_level = logging.DEBUG if args.debug else logging.INFO diff --git a/examples/gemm_tuner/GemmTunerHelpers.h b/examples/gemm_tuner/GemmTunerHelpers.h new file mode 100644 index 0000000000..dbff9e2dff --- /dev/null +++ b/examples/gemm_tuner/GemmTunerHelpers.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2020-2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef EXAMPLES_GEMMTUNERHELPERS_H +#define EXAMPLES_GEMMTUNERHELPERS_H + +#include "arm_compute/core/CL/CLHelpers.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" + +namespace examples +{ +namespace gemm_tuner_helpers +{ +bool update_padding_for_cl_image(arm_compute::ITensorInfo *tensor) +{ + constexpr unsigned int num_floats_per_pixel = 4; + + const unsigned int stride_y_in_elements = tensor->strides_in_bytes()[1] / tensor->element_size(); + const unsigned int pixel_aligment = + arm_compute::get_cl_image_pitch_alignment(arm_compute::CLKernelLibrary::get().get_device()); + if (pixel_aligment == 0) + { + return false; + } + const unsigned int row_pitch_alignment = pixel_aligment * num_floats_per_pixel; + const unsigned int round_up_width = + ((stride_y_in_elements + row_pitch_alignment - 1) / row_pitch_alignment) * row_pitch_alignment; + const unsigned int padding = round_up_width - stride_y_in_elements; + + tensor->extend_padding(arm_compute::PaddingSize(0, padding, 0, 0)); + return true; +} +} // namespace gemm_tuner_helpers +} // namespace examples + +#endif /* EXAMPLES_GEMMTUNERHELPERS_H */ diff --git a/examples/gemm_tuner/README.md b/examples/gemm_tuner/README.md index a4cde10403..aae803eabb 100644 --- a/examples/gemm_tuner/README.md +++ b/examples/gemm_tuner/README.md @@ -2,36 +2,101 @@ ## Introduction -This is a set of 2 script tools for tuning the performance of OpenCL GEMM kernels (limited to Convolution layer -functions only for now). Specifically, we tune 3 GEMM kernels, each has a different implementation **strategy** of the -GEMM operation: **native**, **reshaped**, **reshaped only rhs**. The details of these strategies can be found in the -documentations of the corresponding kernels: **CLGEMMMatrixMultiplyNativeKernel**, -**CLGEMMMatrixMultiplyReshapedKernel** and **CLGEMMMatrixMultiplyReshapedOnlyRHSKernel**. - -The outputs of the tuning process are 1 optimal configuration (called **GEMM Configuration** or **GEMMConfig**, for -more details see Approach section) for each of the 3 strategies. +This is a set of tools for tuning the performance of OpenCL GEMM kernels. Specifically, we tune 3 GEMM kernels, each +has a different implementation **strategy** of the GEMM operation: **native**, **reshaped**, **reshaped only rhs**. +The details of these strategies can be found in the documentations of the corresponding kernels: +**CLGEMMMatrixMultiplyNativeKernel**, **CLGEMMMatrixMultiplyReshapedKernel** and +**CLGEMMMatrixMultiplyReshapedOnlyRHSKernel**. + +The Tuner consists of 2 scripts and 3 binaries: +* cl_gemm_benchmark and GemmTuner.py under examples/gemm_tuner, and +* benchmark_cl_gemm_native, benchmark_cl_gemm_reshaped_rhs_only and benchmark_cl_gemm_reshaped under + build/tests/gemm_tuner (you'll need to build the library first) + +The inputs to the Tuner are a list of 4 valued tuples we call **GEMM shape** or **GEMMParam** (M, N, K, B, and possibly +data type). They define the "shape" and other parameters (eg. data type) of a GEMM operation: +``` +LHS x RHS = DST +``` +Where LHS is of shape MxK, RHS is of shape KxN and DST is of shape MxN, and B is the batch size. + +The outputs of the tuning process are 4 json files: +1. gemm_type_selection.json: selects which kernel type is the best for each GEMMParam +2. gemm_config_native.json: selects a list of best **GEMMConfigs** of the native kernel for each GEMMParam +3. gemm_config_reshapedonlyrhs.json: selects a list of best GEMMConfigs of the reshaped_only_rhs kernel for each GEMMParam +4. gemm_config_reshaped.json: selects a list of best GEMMConfigs of the reshaped kernel for each GEMMParam + +These 4 files are the current representations we use for what we call the **heuristics** of a GEMM op: given a GEMMParam, +what kernel and subsequently what configurations for that kernels are the most performant. + +## Step-by-step example + +### Step1: Prepare the shape and configs files +1. We first need to identify the shapes that we are interested in and store them in a csv file, say *gemm_shapes.csv*. +2. Then we need to specify a set of good GEMMConfig candidates for each kernel in 3 separate csv files (this requires + some prior heuristics, but can be provided by the Compute Library developers upon requests, based on your target device). + + Say we have *gemm_configs_native.csv", "gemm_configs_reshaped.csv" and "gemm_configs_reshaped_only_rhs.csv". + + Please refer to the Prerequisite section for more details + +### Step2: Push relevant files to the target device +All the files that need to be present on the target device are: +* benchmark script: \<ComputeLibrary\>/examples/gemm_tuner/cl_gemm_benchmark +* shapes and configs csv files: gemm_shapes.csv, gemm_configs_native.csv, gemm_configs_reshaped_only_rhs.csv, gemm_configs_reshaped.csv +* Example benchmark binaries: \<ComputeLibrary\>/build/tests/gemm_tuner/benchmark_cl_gemm* + +### Step3: Collect benchmark data +With these files on device, we can collect benchmark data using the script. Assume all the example binaries are pushed +to a folder called *gemm_tuner*. While logged onto our device: +``` +# Native +./cl_gemm_benchmark -s native -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_native.csv -o results/native +# Reshaped Only RHS +./cl_gemm_benchmark -s reshaped_rhs_only -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped_only_rhs.csv -o results/reshaped_only_rhs +# Reshaped +./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped +``` +You can repeat the 3 commands above to have a bit redundancy in your benchmark data (as you can imagine, measurement is noisy), +but you may need to change the output folder for each repeat + +It is also possible to split the benchmark phase among different platforms using the **-i** and **-n** options to specificy the starting experiment and the number of benchmark to run. + +# Reshaped benchmark on 3 different platforms +## Platform 1 +./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped -i 0 -n 8 +## Platform 2 +./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped -i 8 -n 8 +## Platform 3 +./cl_gemm_benchmark -s reshaped -e ./gemm_tuner -g ./gemm_shapes.csv -c ./gemm_configs_reshaped.csv -o results/reshaped -i 16 -n 8 + +### Step4: Generate the heuristics +1. After benchmarking, we pull the benchmark data, the *results* folder, from the target device to our host machine +2. We use the GemmTuner.py script to give us the heuristics + ``` + python3 <ComputeLibrary>/examples/gemm_tuner/GemmTuner.py -b ./results -o heuristics + ``` + When it's finished, there should be 4 json files in the *heuristics* folder -## Location -The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found under $ACL_ROOT/examples/gemm_tuner. +One thing to notice is that the config heuristics might give more than 1 recommendations for each GEMMParam, because +we accept all good GEMMConfigs with a tolerance. If you want fewer recommendations, you can decrease the tolerance by +passing a lower value to *-t \<tolerance\>* to the GemmTuner.py script. -## Pre-requisite +## Prerequisite * A target device to be tuned, plus the following on the device: * Android or Linux OS * Bash shell - * Built ACL with benchmark examples binaries - * benchmark_gemm_examples.sh script + * Built Compute Library with benchmark examples binaries + * cl_gemm_benchmark script * gemm shape file A csv file containing the **GEMMParam search list**. This is the list of GEMMParams/gemm shapes that we're - interested in (For more details see Approach section). The default list is prepared by ACL developers in advance + interested in (For more details see Approach section). The default list is prepared by Compute Library developers in advance and can be provided on request. The format is described as: - A headerless csv file with fields separated by commas and commas only (there cannot be whitespaces around each - field). - - Note also comments and extraneous empty lines are not permitted. + A headerless csv file with fields separated by commas. A gemm shape is a list of 4 positive integers \<M, N, K, B\> describing the shapes of the two matrices (LHS and RHS) with: @@ -50,14 +115,14 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u * gemm config file A csv file containing the **GEMMConfig search list**. This is the list of candidate GEMMConfigs among which we search for the optimal one. **Note that we have a different list for each strategy.** - The default lists are prepared by ACL developers in advance and can be provided on request. + The default lists are prepared by Compute Library developers in advance and can be provided on request. The format of the file for each strategy is the same: - A headerless csv file with fields separated by commas and commas only (there cannot be whitespaces around each - field). Note also comments and extraneous empty lines are not permitted. + A headerless csv file with fields separated by commas. However the fields of GEMMConfig differ for each strategy: + * Strategy **native**: A gemm config is a list of 3 positive integers \<m0, n0, k0\>, with: @@ -78,9 +143,7 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u ... ``` * Strategy **reshaped_rhs_only**: - - A gemm config is a list of 4 positive integers \<m0, n0, k0, h0\> and 2 boolean values interleave_rhs and - transpose_rhs, with: + A gemm config is a list of 4 positive integers <m0, n0, k0, h0> and 3 boolean values: m0 - Number of rows processed by the matrix multiplication n0 - Number of columns processed by the matrix multiplication @@ -88,6 +151,9 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u h0 - Number of horizontal blocks of size (k0xn0) stored on the same output row interleave_rhs - Interleave rhs matrix (1) / Do not interleave rhs matrix (0) transpose_rhs - Transpose rhs matrix (1) / Do not transpose rhs matrix (0) + export_to_cl_image_rhs - Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0). Can only be true + with certain combinations of the GEMMParams and other configs. Please refer to CLGEMMReshapeRHSMatrixKernel + for more details Only the following configurations of M0, N0 and K0 are currently supported: @@ -98,14 +164,12 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u An example gemm config file looks like: ``` - 4,4,4,1,1,1 - 4,4,4,3,1,0 + 4,4,4,1,1,1,0 + 4,4,4,3,1,0,1 ... ``` * Strategy **reshaped**: - - A gemm config is a list of 5 positive integers \<m0, n0, k0, v0, h0\> and 3 boolean values interleave_lhs, - interleave_rhs and transpose_rhs, with: + A gemm config is a list of 5 positive integers <m0, n0, k0, v0, h0> and 4 boolean values: m0 - Number of rows processed by the matrix multiplication n0 - Number of columns processed by the matrix multiplication @@ -114,29 +178,31 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u h0 - Number of horizontal blocks of size (k0xn0) stored on the same output row interleave_lhs - Interleave lhs matrix (1) / Do not interleave lhs matrix (0) interleave_rhs - Interleave rhs matrix (1) / Do not interleave rhs matrix (0) - transpose_rhs - Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose - lhs matrix (0) + transpose_rhs - Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) + export_to_cl_image_rhs - Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0). Can only be true + with certain combinations of the GEMMParams and other configs. Please refer to CLGEMMReshapeRHSMatrixKernel + for more details - * If rhs matrix is transposed only the following configurations are currently supported: + If rhs matrix is transposed only the following configurations are currently supported: - M0 = 2, 3, 4, 5, 6, 7, 8 - N0 = 2, 3, 4, 8, 16 - K0 = 2, 3, 4, 8, 16 - V0 >= 1 - H0 >= 1 + M0 = 2, 3, 4, 5, 6, 7, 8 + N0 = 2, 3, 4, 8, 16 + K0 = 2, 3, 4, 8, 16 + V0 >= 1 + H0 >= 1 - * If lhs matrix is transposed only the following configurations are currently supported: + If lhs matrix is transposed only the following configurations are currently supported: - M0 = 2, 3, 4, 8 - N0 = 2, 3, 4, 8, 16 - K0 = 2, 3, 4, 8, 16 - V0 >= 1 - H0 >= 1 + M0 = 2, 3, 4, 8 + N0 = 2, 3, 4, 8, 16 + K0 = 2, 3, 4, 8, 16 + V0 >= 1 + H0 >= 1 An example gemm config file looks like: ``` - 4,4,4,1,3,1,1,1 - 4,4,4,3,3,1,1,0 + 4,4,4,1,3,1,1,1,0 + 4,4,4,3,3,1,1,0,1 ... ``` * A host machine, plus these on the machine: @@ -144,45 +210,53 @@ The 2 scripts **benchmark_gemm_examples.sh** and **GemmTuner.py** can be found u * GemmTuner.py script ## Usage -The tuning stage consists of 2 steps: +The usage of the 2 scripts: -1. Run benchmarks: +1. cl_gemm_benchmark - Run the shell script (**benchmark_gemm_examples.sh**) on your **target device**. Note that all the built benchmark - examples have to be present on your target device prior to running. The benchmark results will be saved to json - files in an output directory. + Run the shell script (**cl_gemm_benchmark**) on your **target device**. Note that all the built benchmark + examples: build/tests/gemm_tuner/benchmark_cl_gemm*, have to be present on your target device prior to running. + The benchmark results will be saved to json files in an output directory. ``` - Usage: benchmark_gemm_examples.sh [-h] -s \<strategy\> -e \<example_binary_dir\> -g \<gemm_shape_file\> - -c \<gemm_config_file\> [-o \<out_dir\>] + Usage: cl_gemm_benchmark [-h] -s \<strategy\> -e \<example_binary_dir\> -g \<gemm_shape_file\> + -c \<gemm_config_file\> [-d \<data_type\>] [-o \<out_dir\>] Options: -h - Print help messages. If a strategy is specified with -s \<strategy\>, then only display messages relevant - to that strategy. Otherwise if no strategy is specified, display messages for all available strategies. + Print help messages. If a strategy is specified with -s <strategy>, then only display messages relevant to that + strategy. Otherwise if no strategy is specified, display messages for all available strategies. - -s \<strategy\> + -s <strategy> Strategy option. - Options: native reshaped_rhs_only reshaped. + Options: ${ALL_STRATEGY_OPTIONS[@]}. - -e \<example_binary_dir\> + -e <example_binary_dir> Path to directory that holds all example binaries - -g \<gemm_shape_file\> + -g <gemm_shape_file> Path to gemm shape csv file - -c \<gemm_config_file\> + -c <gemm_config_file> Path to gemm config csv file - -o \<out_dir\> + -d <data_type> + Data type option with which to run benchmark examples + Default: ${DEFAULT_DATA_TYPE} + Supported options: + Strategy : Data Types + Native : F32 + Reshaped : F16, F32 + Reshaped RHS Only : F16, F32 + + -o <out_dir> Path to output directory that holds output json files - Default: out + Default: ${DEFAULT_OUT_DIR} ``` -2. Run analyser: +2. GemmTuner.py: Run the python script (**GemmTuner.py**) on your **host machine**. You'll need to transfer all the benchmark result json files generated from the previous step to your host machine - beforehand. The script will output the best configuration, along with some analysis statistics for each strategy, and - optionally save the parsed benchmark results into csv files (one for each strategy) for further analysis. + beforehand. The script will output the best kernel and gemm configurations for each gemm param in the 4 output json files ``` Usage: GemmTuner.py [-h] -b PATH [-o PATH] [-t TOLERANCE] [-D] @@ -194,8 +268,7 @@ The tuning stage consists of 2 steps: result json files have a file extension of 'gemmtuner_benchmark' -o PATH, --output_dir PATH - Path to directory that holds output csv files. One per - strategy + Path to directory that holds output json files. -t TOLERANCE, --tolerance TOLERANCE For testing if two GEMMConfigs are equivalent in terms of performance. The tolerance is OpenCL timer in @@ -203,31 +276,3 @@ The tuning stage consists of 2 steps: -D, --debug Enable script debugging output ``` - -## Approach - -This section gives a brief description and rationale of the approach adopted by the current version of GEMM Tuner. - -As explained in the Introduction section, the outputs of the tuner are 1 optimal GEMMConfig for each strategy. -This is because we can only integrate 1 GEMMConfig for each strategy in ACL at compile time. In theory, however, the -optimal GEMMConfig also depends on different parameters of GEMM (called GEMM Parameter or GEMMParam, e.g.: the shape -of the operation); thus ideally, for each strategy, the optimal configurations should be a mapping from GEMMParam to -GEMMConfig instead of a single GEMMConfig. - -To address this issue, we ensure the one single optimal GEMMConfig can generalise well to all potential GEMMParams -(or at least the ones that we care about). The approach we adopt involves a preliminary stage where a collection of -common GEMMParams (GEMM shapes from popular networks) are compiled. Then, to reduce the final tuning time, rather -contradictorily, we spend a lot of time searching for near-optimal GEMMConfigs for each GEMMParam first, and then -discard redundant GEMMParams which share similar optimal GEMMConfigs with others. The resultant list of GEMMParams is -called a __GEMMParam search list__, as in these GEMMParams are typical enough to capture the space of GEMMParams that -we care about. - -During this preliminary stage we also produce a list of good GEMMConfigs that can be used to search for the optimal one -in the actual tuning stage. This, again, is to reduce the tuning time, and the resultant list is called a -__GEMMConfig search list__. - -The GEMMParam search list and the GEMMConfig search list are investigated and prepared by the developers; the users of -GEMM tuner need not worry about producing them, but they need to obtain them prior to running the tuner. - -Once these two lists (2 for each strategy, so 6 in total) are obtained, they can be fed to the tuner, to produce the -optimal GEMMConfig(s).
\ No newline at end of file diff --git a/examples/gemm_tuner/benchmark_gemm_examples.sh b/examples/gemm_tuner/cl_gemm_benchmark.sh index d6f41cc22a..92fe6b194e 100755 --- a/examples/gemm_tuner/benchmark_gemm_examples.sh +++ b/examples/gemm_tuner/cl_gemm_benchmark.sh @@ -1,4 +1,4 @@ -# Copyright (c) 2019 ARM Limited. +# Copyright (c) 2019-2021 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -31,14 +31,34 @@ CMD=$( basename $0 ) # All supported strategy options ALL_STRATEGY_OPTIONS=("native" "reshaped_rhs_only" "reshaped") +# All supported data type options +ALL_DATA_TYPE_OPTIONS=("f32" "f16" "qasymm8") + # Names of example binary for each strategy EXAMPLE_BIN_NATIVE="benchmark_cl_gemm_native" EXAMPLE_BIN_RESHAPED_RHS_ONLY="benchmark_cl_gemm_reshaped_rhs_only" EXAMPLE_BIN_RESHAPED="benchmark_cl_gemm_reshaped" +EXAMPLE_BIN_RESHAPED_RHS_ONLY_LOWP="benchmark_cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint" +EXAMPLE_BIN_RESHAPED_LOWP="benchmark_cl_gemmlowp_reshaped" + +# Default data type +DEFAULT_DATA_TYPE="f32" # Default output directory DEFAULT_OUT_DIR="out" +# Default ID of the first experiment +DEFAULT_ID_EXPERIMENT_START=0 + +# Default total number of experiments +DEFAULT_NUM_EXPERIMENTS="all" + +# Default output file extension +DEFAULT_OUT_EXTENSION="mlgo_benchmark" + +# Default OpenCL tuner mode +DEFAULT_TUNER_MODE="rapid" + # Number of iterations for each benchmark run NUM_ITERATION=5 # Global }}} @@ -56,10 +76,7 @@ NUM_ITERATION=5 function help_gemm_shape_file() { cat >&2 << EOF Gemm shape file: - Gemm shape file is a headerless csv file with fields separated by commas and commas only (there cannot be whitespaces - around each field). - - Note also comments and extraneous empty lines are not permitted. + Gemm shape file is a csv file with fields separated by commas. The optional header and comments are ignored by the parser. A gemm shape is a list of 4 positive integers <M, N, K, B> describing the shapes of the two matrices (LHS and RHS) with: @@ -88,10 +105,7 @@ EOF function help_gemm_config_file_native() { cat >&2 << EOF Gemm config file (Strategy native): - Gemm config file is a headerless csv file with fields separated by commas and commas only (there cannot be whitespaces - around each field). - - Note also comments and extraneous empty lines are not permitted. + Gemm config file is a csv file with fields separated by commas. The optional header and comments are ignored by the parser. A gemm config is a list of 3 positive integers <m0, n0, k0>, with: m0 - Number of rows processed by the matrix multiplication @@ -123,18 +137,18 @@ EOF function help_gemm_config_file_reshaped_rhs_only() { cat >&2 << EOF Gemm config file (Strategy reshaped_rhs_only): - Gemm config file is a headerless csv file with fields separated by commas and commas only (there cannot be whitespaces - around each field). - - Note also comments and extraneous empty lines are not permitted. + Gemm config file is a csv file with fields separated by commas. The optional header and comments are ignored by the parser. - A gemm config is a list of 4 positive integers <m0, n0, k0, h0> and 2 boolean values interleave_rhs and transpose_rhs, with: + A gemm config is a list of 4 positive integers <m0, n0, k0, h0> and 3 boolean values: m0 - Number of rows processed by the matrix multiplication n0 - Number of columns processed by the matrix multiplication k0 - Number of partial accumulations performed by the matrix multiplication h0 - Number of horizontal blocks of size (k0xn0) stored on the same output row interleave_rhs - Interleave rhs matrix (1) / Do not interleave rhs matrix (0) transpose_rhs - Transpose rhs matrix (1) / Do not transpose rhs matrix (0) + export_to_cl_image_rhs - (Not supported for quantized types) Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0). Can only be true + with certain combinations of the GEMMParams and other configs. Please refer to CLGEMMReshapeRHSMatrixKernel + for more details Only the following configurations of M0, N0 and K0 are currently supported: M0 = 1, 2, 3, 4, 5, 6, 7, 8 @@ -143,8 +157,8 @@ Gemm config file (Strategy reshaped_rhs_only): H0 >= 1 An example gemm config file looks like: - 4,4,4,1,1,1 - 4,4,4,3,1,0 + 4,4,4,1,1,1,0 + 4,4,4,3,1,0,1 ... EOF @@ -162,12 +176,9 @@ EOF function help_gemm_config_file_reshaped() { cat >&2 << EOF Gemm config file (Strategy reshaped): - Gemm config file is a headerless csv file with fields separated by commas and commas only (there cannot be whitespaces - around each field). + Gemm config file is a csv file with fields separated by commas. The header and comments are ignored by the parser. - Note also comments and extraneous empty lines are not permitted. - - A gemm config is a list of 5 positive integers <m0, n0, k0, v0, h0> and 3 boolean values interleave_lhs, interleave_rhs and transpose_rhs, with: + A gemm config is a list of 5 positive integers <m0, n0, k0, v0, h0> and 4 boolean values: m0 - Number of rows processed by the matrix multiplication n0 - Number of columns processed by the matrix multiplication k0 - Number of partial accumulations performed by the matrix multiplication @@ -176,6 +187,9 @@ Gemm config file (Strategy reshaped): interleave_lhs - Interleave lhs matrix (1) / Do not interleave lhs matrix (0) interleave_rhs - Interleave rhs matrix (1) / Do not interleave rhs matrix (0) transpose_rhs - Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0) + export_to_cl_image_rhs - (Not supported for quantized types) Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0). Can only be true + with certain combinations of the GEMMParams and other configs. Please refer to CLGEMMReshapeRHSMatrixKernel + for more details If rhs matrix is transposed only the following configurations are currently supported: M0 = 2, 3, 4, 5, 6, 7, 8 @@ -192,8 +206,8 @@ Gemm config file (Strategy reshaped): H0 >= 1 An example gemm config file looks like: - 4,4,4,1,3,1,1,1 - 4,4,4,3,3,1,1,0 + 4,4,4,1,3,1,1,1,0 + 4,4,4,3,3,1,1,0,1 ... EOF @@ -213,7 +227,7 @@ function usage() { Run gemm examples of a selected strategy, over provided tunable configurationsa and gemm shapes. Save the benchmark results to json files in an output directory. -Usage: ${CMD} [-h] -s <strategy> -e <example_binary_dir> -g <gemm_shape_file> -c <gemm_config_file> [-o <out_dir>] +Usage: ${CMD} [-h] -s <strategy> -e <example_binary_dir> -g <gemm_shape_file> -c <gemm_config_file> [-o <out_dir>] [-d <data_type>] [-i <id_experiment_start>] [-n <num_experiments>] [-t <output_extension>] Options: -h @@ -233,10 +247,35 @@ Options: -c <gemm_config_file> Path to gemm config csv file + -d <data_type> + Data type option with which to run benchmark examples + Default: ${DEFAULT_DATA_TYPE} + Supported options: + Strategy : Data Types + Native : f32 + Reshaped : f32, f16, qasymm8 + Reshaped RHS Only : f32, f16, qasymm8 + -o <out_dir> Path to output directory that holds output json files Default: ${DEFAULT_OUT_DIR} + -i <id_experiment_start> + ID of the first experiment. + Default: ${DEFAULT_ID_EXPERIMENT_START} + + -n <num_experiments> + Total number of experiments to execute in this session. [1-all] + Default: ${DEFAULT_NUM_EXPERIMENTS} + + -t <output_extension> + Output file extension. + Default: ${DEFAULT_OUT_EXTENSION} + + -m <tuner_mode> + OpenCL tuner mode. + Default: ${DEFAULT_TUNER_MODE} + EOF # Print help messages about gemm shapes and various gemm configs $HELP && help_gemm_shape_file @@ -315,10 +354,17 @@ function arr_contains() { # Globals: # OUT_DIR # OUT_EXTENSION +# TUNER_MODE # EXAMPLE_BIN_DIR # NUM_ITERATION # GEMM_CONFIGS_FILE # GEMM_SHAPES_FILE +# STRATEGY_OPTION +# DATA_TYPE +# OUT_DIR +# ID_EXPERIMENT_START +# NUM_EXPERIMENTS + # Arguments: # example_bin Name of the example binary to run # Returns: @@ -326,41 +372,97 @@ function arr_contains() { ####################################### function run() { local example_bin=$1 - echo "Running all configs for ${example_bin}" 1>&2 + echo "Running experiments for ${example_bin}" 1>&2 local example_args - local expr_count=1 + local json_filename + local expr_count=0 + # Total number of experiments available + local num_experiments_total # Total number of experiment runs scheduled for this session - local total_num_experiment - local num_params - local num_configs - num_params=$( wc -l ${GEMM_SHAPES_FILE} | cut -d " " -f 1) - num_configs=$( wc -l ${GEMM_CONFIGS_FILE} | cut -d " " -f 1 ) - (( total_num_experiment=${num_params} * ${num_configs} )) + local num_experiments_session + local id_experiment_start + local id_experiment_end + local array_shapes + local array_configs + local array_shapes_len + local array_configs_len + local array_shapes_idx + local array_configs_idx + local match_expression_shape="^([^,]*,){3}[^,]*$" + local match_expression_config="^(\s*[0-9]+\s*,)+\s*[0-9]\s*$" + local shapes_list_cmd="grep -E "$match_expression_shape" "${GEMM_SHAPES_FILE}"" + local configs_list_cmd="grep -E "$match_expression_config" "${GEMM_CONFIGS_FILE}"" + + # Create array from CSV file + array_shapes=($( $shapes_list_cmd )) + array_configs=($( $configs_list_cmd )) + + # Get array length + array_shapes_len=${#array_shapes[@]} + array_configs_len=${#array_configs[@]} + + # Get the total number of experiments available + (( num_experiments_total=${array_shapes_len} * ${array_configs_len} )) + + # Get the number of experiments to execute in this session + if [ ${NUM_EXPERIMENTS} == ${DEFAULT_NUM_EXPERIMENTS} ] + then + (( num_experiments_session=${array_shapes_len} * ${array_configs_len} )) + else + num_experiments_session=$NUM_EXPERIMENTS + fi + + # Id experiment start + id_experiment_start=${ID_EXPERIMENT_START} + + # Id experiment end + (( id_experiment_end=(${num_experiments_session} + ${id_experiment_start} - 1) )) + + # Check if the id experiment end is grater than or equal to the total number of experiments available. + # If the condition is satisfied, clamp the id experiment end + if [ "$id_experiment_end" -ge "$num_experiments_total" ] + then + echo "Clamping idx experiment end" 1>&2 + (( id_experiment_end=${num_experiments_total} - 1 )) + (( num_experiments_session=${id_experiment_start} + ${id_experiment_end} + 1 )) + fi + # Time elapsed since the beginning in seconds local time_elapsed_s # Time estimated to finish in seconds local time_est_s - echo "Running a total number of ${total_num_experiment} experiments" 1>&2 + echo "Running a total number of ${num_experiments_session} experiments" 1>&2 + echo "Experiment idx start/end [${id_experiment_start}, ${id_experiment_end}]" 1>&2 - while read gemm_shape + # Run experiments + for i in $(seq $id_experiment_start $id_experiment_end); do - while read gemm_config - do - echo "Running..." 1>&2 - example_args="${gemm_shape},${gemm_config}" - # Run experiment - ${EXAMPLE_BIN_DIR}/${example_bin} --example_args=${example_args} --iterations=${NUM_ITERATION} --json-file=${OUT_DIR}/${expr_count}.${OUT_EXTENSION} --instruments=OPENCL_TIMER_MS - # Print progress - print_progress ${expr_count} ${total_num_experiment} - # Print time statistics - time_elapsed_s=$SECONDS - echo "Time elapsed since beginning: $(( $time_elapsed_s / 60 ))m $(( $time_elapsed_s % 60 ))s" 1>&2 - (( time_est_s=(${total_num_experiment} - ${expr_count}) * ${time_elapsed_s} / ${expr_count} )) - echo "Time estimated to finish: $(( $time_est_s / 60 ))m $(( $time_est_s % 60 ))s" 1>&2 - (( expr_count++ )) - echo "Done." 1>&2 - done < "${GEMM_CONFIGS_FILE}" - done < "${GEMM_SHAPES_FILE}" + (( array_shapes_idx=${i} / ${array_configs_len} )) + (( array_configs_idx=${i} % ${array_configs_len} )) + + gemm_shape=${array_shapes[$array_shapes_idx]} + gemm_config=${array_configs[$array_configs_idx]} + + echo "Running shape[$array_shapes_idx]=$gemm_shape with config[$array_configs_idx]=$gemm_config" 1>&2 + + example_args="${gemm_shape},${gemm_config},--type=${DATA_TYPE},--tuner-mode=${TUNER_MODE}" + json_filename="${STRATEGY_OPTION}_${gemm_shape}_${gemm_config}_${DATA_TYPE}" + # Replace "," with "_" + json_filename=${json_filename//,/_} + + # Run experiment + ${EXAMPLE_BIN_DIR}/${example_bin} --example_args=${example_args} --iterations=${NUM_ITERATION} --json-file=${OUT_DIR}/${json_filename}.${OUT_EXTENSION} --instruments=OPENCL_TIMER_MS + # Print progress + (( expr_count++ )) + print_progress ${expr_count} ${num_experiments_session} + # Print time statistics + time_elapsed_s=$SECONDS + echo "Time elapsed since beginning: $(( $time_elapsed_s / 60 ))m $(( $time_elapsed_s % 60 ))s" 1>&2 + (( time_est_s=(${num_experiments_session} - ${expr_count}) * ${time_elapsed_s} / ${expr_count} )) + echo "Time estimated to finish: $(( $time_est_s / 60 ))m $(( $time_est_s % 60 ))s" 1>&2 + echo "Done." 1>&2 + done + echo "Finished running all configs for ${example_bin}" 1>&2 echo "All results saved to ${OUT_DIR}" 1>&2 } @@ -404,23 +506,37 @@ EXAMPLE_BIN_DIR="" GEMM_SHAPES_FILE="" # Path to gemm configs file GEMM_CONFIGS_FILE="" +# Strategy option STRATEGY_OPTION="" +# Data type to use +DATA_TYPE=${DEFAULT_DATA_TYPE} # Path to output directory OUT_DIR=${DEFAULT_OUT_DIR} +# ID of the first experiment +ID_EXPERIMENT_START=${DEFAULT_ID_EXPERIMENT_START} +# Total number of experiments to execute in this session +NUM_EXPERIMENTS=${DEFAULT_NUM_EXPERIMENTS} # Output benchmark result file extension -OUT_EXTENSION="gemmtuner_benchmark" +OUT_EXTENSION=${DEFAULT_OUT_EXTENSION} +# OpenCL tuner mode +TUNER_MODE=${DEFAULT_TUNER_MODE} # Toggle help HELP=false # Obtain options -while getopts "hs:e:g:c:o:" opt; do +while getopts "hs:e:g:c:d:o:i:n:t:m:" opt; do case "$opt" in h) HELP=true ;; s) STRATEGY_OPTION=$(to_lower "${OPTARG}");; e) EXAMPLE_BIN_DIR="${OPTARG}";; g) GEMM_SHAPES_FILE="${OPTARG}";; c) GEMM_CONFIGS_FILE="${OPTARG}";; + d) DATA_TYPE=$(to_lower "${OPTARG}");; o) OUT_DIR="${OPTARG}";; + i) ID_EXPERIMENT_START="${OPTARG}";; + n) NUM_EXPERIMENTS="${OPTARG}";; + t) OUT_EXTENSION="${OPTARG}";; + m) TUNER_MODE="${OPTARG}";; esac done shift $((OPTIND - 1)) @@ -454,17 +570,27 @@ $HELP && arr_contains "${STRATEGY_OPTION}" "${ALL_STRATEGY_OPTIONS[@]}" || error_msg "Does not support strategy ${STRATEGY_OPTION}" +# Verify data type option is valid +arr_contains "${DATA_TYPE}" "${ALL_DATA_TYPE_OPTIONS[@]}" || + error_msg "Does not support data type ${DATA_TYPE}" + # Make sure existing benchmark outputs are not overwritten [ ! -d "${OUT_DIR}" ] || error_msg "Output directory ${OUT_DIR} already exists!" # Make output directory -mkdir ${OUT_DIR} +echo "Making output directory ${OUT_DIR}" 1>&2 +mkdir -p ${OUT_DIR} || error_msg "Failed to make output directory ${OUT_DIR}" # Run selected strategy with all configurations # Restart the built-in timer SECONDS=0 -[ "${STRATEGY_OPTION}" == "native" ] && run $EXAMPLE_BIN_NATIVE -[ "${STRATEGY_OPTION}" == "reshaped_rhs_only" ] && run $EXAMPLE_BIN_RESHAPED_RHS_ONLY -[ "${STRATEGY_OPTION}" == "reshaped" ] && run $EXAMPLE_BIN_RESHAPED +if [ "$DATA_TYPE" == "qasymm8" ]; then + [ "${STRATEGY_OPTION}" == "reshaped_rhs_only" ] && run $EXAMPLE_BIN_RESHAPED_RHS_ONLY_LOWP + [ "${STRATEGY_OPTION}" == "reshaped" ] && run $EXAMPLE_BIN_RESHAPED_LOWP +else + [ "${STRATEGY_OPTION}" == "native" ] && run $EXAMPLE_BIN_NATIVE + [ "${STRATEGY_OPTION}" == "reshaped_rhs_only" ] && run $EXAMPLE_BIN_RESHAPED_RHS_ONLY + [ "${STRATEGY_OPTION}" == "reshaped" ] && run $EXAMPLE_BIN_RESHAPED +fi # Main: Main script }}} diff --git a/examples/gemm_tuner/cl_gemm_native.cpp b/examples/gemm_tuner/cl_gemm_native.cpp index 0cacd82087..7daa0b07d3 100644 --- a/examples/gemm_tuner/cl_gemm_native.cpp +++ b/examples/gemm_tuner/cl_gemm_native.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,23 +25,24 @@ #error "This example needs to be built with -DARM_COMPUTE_CL" #endif /* ARM_COMPUTE_CL */ -#include "CommonGemmExampleOptions.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyNativeKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" + +#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.h" #include "tests/CL/Helper.h" -#include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" +#include "CommonGemmExampleOptions.h" #include <cstdlib> using namespace arm_compute; +using namespace arm_compute::opencl::kernels; using namespace utils; using namespace arm_compute::misc::shape_calculator; using namespace gemm_tuner; @@ -51,9 +52,9 @@ namespace /** Structure holding all tunable gemm configs specific to this example/strategy */ struct GemmConfigs { - size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */ - size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */ - size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ }; /** Formatted output of the GemmConfigs type @@ -123,8 +124,8 @@ GemmConfigs consume_gemm_configs(const GemmConfigOptions &options) } } // namespace -// Create function for CLGEMMMatrixMultiplyNativeKernel -using CLGEMMMatrixMultiplyNative = test::CLSynthetizeFunction<CLGEMMMatrixMultiplyNativeKernel>; +// Create function for ClGemmMatrixMultiplyNativeKernel +using CLGEMMMatrixMultiplyNative = test::CLSynthetizeOperator<ClGemmMatrixMultiplyNativeKernel>; class CLGEMMMatrixMultiplyNativeExample : public Example { @@ -132,10 +133,9 @@ public: bool do_setup(int argc, char **argv) override { // Default parameters - const DataType data_type = DataType::F32; - const float alpha = 1.0f; - const float beta = 0.0f; - const ActivationLayerInfo act_info = ActivationLayerInfo(); + const float alpha = 1.0f; + const float beta = 0.0f; + const ActivationLayerInfo act_info = ActivationLayerInfo(); CommonGemmExampleParams params; GemmConfigs configs; @@ -146,13 +146,13 @@ public: // Parse command line options parser.parse(argc, argv); - if(param_options.help->is_set() && param_options.help->value()) + if (param_options.help->is_set() && param_options.help->value()) { // Print help message parser.print_help(argv[0]); return false; } - if(!parser.validate()) + if (!parser.validate()) { // Invalid arguments. Use default parameters and configs std::cerr << "Invalid arguments." << std::endl; @@ -167,16 +167,18 @@ public: } // Print gemm parameters and configurations - std::cerr << "Gemm parameters:" << std::endl; - std::cerr << params << std::endl; - std::cerr << "Gemm configurations:" << std::endl; - std::cerr << configs << std::endl; + std::cout << "Gemm parameters:" << std::endl; + std::cout << params << std::endl; + std::cout << "Gemm configurations:" << std::endl; + std::cout << configs << std::endl; + + tuner.set_tuner_mode(params.tuner_mode); CLScheduler::get().default_init(&tuner); - lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, data_type)); - rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, data_type)); - bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, data_type)); + lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type)); + rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type)); + bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, params.data_type)); GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = configs.m0; @@ -195,8 +197,20 @@ public: kernel_info.broadcast_bias = true; kernel_info.activation_info = act_info; + // Validate argments + Status status{}; + status = gemm.validate(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, + kernel_info); + if (!status) + { + // Unsupported arguments + std::cerr << "Unsupported arguments." << std::endl; + std::cerr << "Check documentation for supported/unsupported combinations" << std::endl; + return false; + } + // Configure function - gemm.configure(&lhs, &rhs, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + gemm.configure(lhs.info(), rhs.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, kernel_info); // Allocate tensors lhs.allocator()->allocate(); @@ -209,7 +223,8 @@ public: void do_run() override { // Execute the function - gemm.run(); + ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, {ACL_SRC_1, &rhs}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}}); + gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: CLScheduler::get().sync(); diff --git a/examples/gemm_tuner/cl_gemm_reshaped.cpp b/examples/gemm_tuner/cl_gemm_reshaped.cpp index e579ed762c..75f3539cb9 100644 --- a/examples/gemm_tuner/cl_gemm_reshaped.cpp +++ b/examples/gemm_tuner/cl_gemm_reshaped.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,24 +25,26 @@ #error "This example needs to be built with -DARM_COMPUTE_CL" #endif /* ARM_COMPUTE_CL */ -#include "CommonGemmExampleOptions.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedKernel.h" -#include "arm_compute/core/CL/kernels/CLGEMMReshapeLHSMatrixKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" + +#include "examples/gemm_tuner/CommonGemmExampleOptions.h" +#include "examples/gemm_tuner/GemmTunerHelpers.h" +#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" #include "tests/CL/Helper.h" -#include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" #include <cstdlib> using namespace arm_compute; +using namespace arm_compute::opencl::kernels; using namespace utils; using namespace arm_compute::misc::shape_calculator; using namespace gemm_tuner; @@ -52,15 +54,16 @@ namespace /** Structure holding all tunable gemm configs specific to this example/strategy */ struct GemmConfigs { - size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */ - size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */ - size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */ - size_t v0{ 1 }; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ - size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ - bool interleave_lhs{ true }; /**< Interleave lhs matrix */ - bool transpose_lhs{ true }; /**< Transpose lhs matrix. */ - bool interleave_rhs{ true }; /**< Interleave rhs matrix */ - bool transpose_rhs{ true }; /**< Transpose rhs matrix. */ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ + size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool interleave_lhs{true}; /**< Interleave lhs matrix */ + bool transpose_lhs{true}; /**< Transpose lhs matrix. */ + bool interleave_rhs{true}; /**< Interleave rhs matrix */ + bool transpose_rhs{true}; /**< Transpose rhs matrix. */ + bool export_to_cl_image_rhs{true}; /**< Export rhs matrix to cl_image. */ }; /** Formatted output of the GemmConfigs type @@ -84,6 +87,7 @@ struct GemmConfigs os << "transpose_lhs : " << (configs.transpose_lhs ? true_str : false_str) << std::endl; os << "interleave_rhs : " << (configs.interleave_rhs ? true_str : false_str) << std::endl; os << "transpose_rhs : " << (configs.transpose_rhs ? true_str : false_str) << std::endl; + os << "export_to_cl_image_rhs : " << (configs.export_to_cl_image_rhs ? true_str : false_str) << std::endl; return os; } @@ -103,7 +107,8 @@ public: h0(parser.add_positional_option<SimpleOption<size_t>>("h0", 1)), interleave_lhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_lhs", 1)), interleave_rhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_rhs", 1)), - transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1)) + transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1)), + export_to_cl_image_rhs(parser.add_positional_option<SimpleOption<size_t>>("export_to_cl_image_rhs", 1)) { m0->set_help("Number of rows processed by the matrix multiplication"); n0->set_help("Number of columns processed by the matrix multiplication"); @@ -115,7 +120,10 @@ public: // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other // 2 variants (both transposed and none transposed) - transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do transpose lhs matrix (0)"); + transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do " + "transpose lhs matrix (0)"); + export_to_cl_image_rhs->set_help( + "Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)"); } /** Prevent instances of this class from being copied (As this class contains pointers) */ GemmConfigOptions(const GemmConfigOptions &) = delete; @@ -128,17 +136,19 @@ public: /** Default destructor */ ~GemmConfigOptions() = default; - SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */ - SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */ - SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ - SimpleOption<size_t> *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */ - SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ + SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */ + SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */ + SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ + SimpleOption<size_t> *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */ + SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ SimpleOption<size_t> *interleave_lhs; /**< Interleave lhs matrix option (1 enable; 0 disable) */ SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */ // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other // 2 variants (both transposed and none transposed) - SimpleOption<size_t> *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */ + SimpleOption<size_t> * + transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */ + SimpleOption<size_t> *export_to_cl_image_rhs; /**< Export rhs matrix to cl_image.*/ }; /** Consumes the gemm configuration options and creates a structure containing all information @@ -159,17 +169,19 @@ GemmConfigs consume_gemm_configs(const GemmConfigOptions &options) // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other // 2 variants (both transposed and none transposed) - configs.transpose_lhs = options.transpose_rhs->value() == 0; - configs.interleave_rhs = options.interleave_rhs->value() != 0; - configs.transpose_rhs = options.transpose_rhs->value() != 0; + configs.transpose_lhs = options.transpose_rhs->value() == 0; + configs.interleave_rhs = options.interleave_rhs->value() != 0; + configs.transpose_rhs = options.transpose_rhs->value() != 0; + configs.export_to_cl_image_rhs = options.export_to_cl_image_rhs->value() != 0; return configs; } } // namespace -// Create function for CLGEMMReshapeLHSMatrixKernel -using CLGEMMReshapeLHSMatrix = test::CLSynthetizeFunction<CLGEMMReshapeLHSMatrixKernel>; -// Create function for CLGEMMMatrixMultiplyReshapedKernel -using CLGEMMMatrixMultiplyReshaped = test::CLSynthetizeFunction<CLGEMMMatrixMultiplyReshapedKernel>; + +// Create function for ClGemmReshapeLhsMatrixKernel +using CLGEMMReshapeLHSMatrix = test::CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>; +// Create function for ClGemmMatrixMultiplyReshapedKernel +using CLGEMMMatrixMultiplyReshaped = test::CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedKernel>; class CLGEMMMatrixMultiplyReshapedExample : public Example { @@ -177,10 +189,9 @@ public: bool do_setup(int argc, char **argv) override { // Default parameters - const DataType data_type = DataType::F32; - const float alpha = 1.0f; - const float beta = 0.0f; - const ActivationLayerInfo act_info = ActivationLayerInfo(); + const float alpha = 1.0f; + const float beta = 0.0f; + const ActivationLayerInfo act_info = ActivationLayerInfo(); CommonGemmExampleParams params; GemmConfigs configs; @@ -191,13 +202,13 @@ public: // Parse command line options parser.parse(argc, argv); - if(param_options.help->is_set() && param_options.help->value()) + if (param_options.help->is_set() && param_options.help->value()) { // Print help message parser.print_help(argv[0]); return false; } - if(!parser.validate()) + if (!parser.validate()) { // Invalid arguments. Use default parameters and configs std::cerr << "Invalid arguments." << std::endl; @@ -212,16 +223,18 @@ public: } // Print gemm parameters and configurations - std::cerr << "Gemm parameters:" << std::endl; - std::cerr << params << std::endl; - std::cerr << "Gemm configurations:" << std::endl; - std::cerr << configs << std::endl; + std::cout << "Gemm parameters:" << std::endl; + std::cout << params << std::endl; + std::cout << "Gemm configurations:" << std::endl; + std::cout << configs << std::endl; + + tuner.set_tuner_mode(params.tuner_mode); CLScheduler::get().default_init(&tuner); - lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, data_type)); - rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, data_type)); - bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, data_type)); + lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type)); + rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type)); + bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, params.data_type)); GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = configs.m0; @@ -231,11 +244,12 @@ public: lhs_info.transpose = configs.transpose_lhs; GEMMRHSMatrixInfo rhs_info; - rhs_info.n0 = configs.n0; - rhs_info.k0 = configs.k0; - rhs_info.h0 = configs.h0; - rhs_info.interleave = configs.interleave_rhs; - rhs_info.transpose = configs.transpose_rhs; + rhs_info.n0 = configs.n0; + rhs_info.k0 = configs.k0; + rhs_info.h0 = configs.h0; + rhs_info.interleave = configs.interleave_rhs; + rhs_info.transpose = configs.transpose_rhs; + rhs_info.export_to_cl_image = configs.export_to_cl_image_rhs; GEMMKernelInfo kernel_info; kernel_info.m = params.M; @@ -246,17 +260,55 @@ public: kernel_info.broadcast_bias = true; kernel_info.activation_info = act_info; + if (rhs_info.h0 == 0) + { + rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U); + } + // Initialise lhs_reshaped tensor info - auto_init_if_empty(*lhs_reshaped.info(), lhs.info()->clone()->set_tensor_shape(compute_lhs_reshaped_shape(*lhs.info(), lhs_info))); + lhs_reshaped.allocator()->init( + TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type)); // Initialise rhs_reshaped tensor info - auto_init_if_empty(*rhs_reshaped.info(), rhs.info()->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*rhs.info(), rhs_info))); + rhs_reshaped.allocator()->init( + TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); + + if (rhs_info.export_to_cl_image) + { + if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) + { + std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl; + return false; + } + } + + // Validate argments + Status status{}; + status = reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, kernel_info.reinterpret_input_as_3d); + if (!status) + { + // Unsupported arguments + std::cerr << "Unsupported arguments." << std::endl; + std::cerr << "Check documentation for supported/unsupported combinations" << std::endl; + return false; + } + + status = gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, + rhs_info, kernel_info); + if (!status) + { + // Unsupported arguments + std::cerr << "Unsupported arguments." << std::endl; + std::cerr << "Check documentation for supported/unsupported combinations" << std::endl; + return false; + } // Configure reshape lhs function - reshape_lhs.configure(&lhs, &lhs_reshaped, lhs_info); + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); // Configure function - gemm.configure(&lhs_reshaped, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, + rhs_info, kernel_info); // Allocate tensors lhs.allocator()->allocate(); @@ -270,9 +322,13 @@ public: } void do_run() override { - // Execute the function - reshape_lhs.run(); - gemm.run(); + // Execute the functions + ITensorPack reshape_lsh_pack({{ACL_SRC, &lhs}, {ACL_DST, &lhs_reshaped}}); + reshape_lhs.run(reshape_lsh_pack); + + ITensorPack gemm_pack( + {{ACL_SRC_0, &lhs_reshaped}, {ACL_SRC_1, &rhs_reshaped}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}}); + gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: CLScheduler::get().sync(); @@ -297,7 +353,7 @@ private: /** Main program for gemm reshaped test * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] v0, [optional] h0, [optional] interleave_lhs, [optional] interleave_rhs, [optional] transpose_rhs ) + * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] v0, [optional] h0, [optional] interleave_lhs, [optional] interleave_rhs, [optional] transpose_rhs, [optional] export_to_cl_image ) */ int main(int argc, char **argv) { diff --git a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp index 0d161aab2d..cfea2c9bac 100644 --- a/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp +++ b/examples/gemm_tuner/cl_gemm_reshaped_rhs_only.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -25,23 +25,25 @@ #error "This example needs to be built with -DARM_COMPUTE_CL" #endif /* ARM_COMPUTE_CL */ -#include "CommonGemmExampleOptions.h" -#include "arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyReshapedOnlyRHSKernel.h" #include "arm_compute/core/Helpers.h" #include "arm_compute/core/KernelDescriptors.h" #include "arm_compute/core/Types.h" #include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/runtime/CL/CLFunctions.h" #include "arm_compute/runtime/CL/CLScheduler.h" #include "arm_compute/runtime/CL/CLTuner.h" + +#include "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.h" #include "tests/CL/Helper.h" -#include "utils/Utils.h" #include "utils/command_line/CommandLineOptions.h" #include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" +#include "CommonGemmExampleOptions.h" +#include "GemmTunerHelpers.h" #include <cstdlib> using namespace arm_compute; +using namespace arm_compute::opencl::kernels; using namespace utils; using namespace arm_compute::misc::shape_calculator; using namespace gemm_tuner; @@ -51,12 +53,13 @@ namespace /** Structure holding all tunable gemm configs specific to this example/strategy */ struct GemmConfigs { - size_t m0{ 4 }; /**< Number of rows processed by the matrix multiplication */ - size_t n0{ 4 }; /**< Number of columns processed by the matrix multiplication */ - size_t k0{ 4 }; /**< Number of partial accumulations performed by the matrix multiplication */ - size_t h0{ 1 }; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ - bool interleave_rhs{ true }; /**< Interleave rhs matrix */ - bool transpose_rhs{ true }; /**< Transpose rhs matrix */ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool interleave_rhs{true}; /**< Interleave rhs matrix */ + bool transpose_rhs{true}; /**< Transpose rhs matrix */ + bool export_to_cl_image_rhs{true}; /**< Export rhs matrix to cl_image.*/ }; /** Formatted output of the GemmConfigs type @@ -77,6 +80,7 @@ struct GemmConfigs os << "h0 : " << configs.h0 << std::endl; os << "interleave_rhs : " << (configs.interleave_rhs ? true_str : false_str) << std::endl; os << "transpose_rhs : " << (configs.transpose_rhs ? true_str : false_str) << std::endl; + os << "export_to_cl_image_rhs : " << (configs.export_to_cl_image_rhs ? true_str : false_str) << std::endl; return os; } @@ -94,7 +98,8 @@ public: k0(parser.add_positional_option<SimpleOption<size_t>>("k0", 4)), h0(parser.add_positional_option<SimpleOption<size_t>>("h0", 1)), interleave_rhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_rhs", 1)), - transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1)) + transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1)), + export_to_cl_image_rhs(parser.add_positional_option<SimpleOption<size_t>>("export_to_cl_image_rhs", 1)) { m0->set_help("Number of rows processed by the matrix multiplication"); n0->set_help("Number of columns processed by the matrix multiplication"); @@ -102,6 +107,8 @@ public: h0->set_help("Number of horizontal blocks of size (k0xn0) stored on the same output row"); interleave_rhs->set_help("Interleave rhs matrix (1) / Do not interleave rhs matrix (0)"); transpose_rhs->set_help("Transpose rhs matrix (1) / Do not transpose rhs matrix (0)"); + export_to_cl_image_rhs->set_help( + "Export rhs matrix to cl_image (1) / Do not export rhs matrix to cl_image (0)"); } /** Prevent instances of this class from being copied (As this class contains pointers) */ GemmConfigOptions(const GemmConfigOptions &) = delete; @@ -114,12 +121,13 @@ public: /** Default destructor */ ~GemmConfigOptions() = default; - SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */ - SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */ - SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ - SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ - SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */ - SimpleOption<size_t> *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable) */ + SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */ + SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */ + SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ + SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ + SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */ + SimpleOption<size_t> *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable) */ + SimpleOption<size_t> *export_to_cl_image_rhs; /**< Export rhs matrix to cl_image.*/ }; /** Consumes the gemm configuration options and creates a structure containing all information @@ -131,18 +139,19 @@ public: GemmConfigs consume_gemm_configs(const GemmConfigOptions &options) { GemmConfigs configs; - configs.m0 = options.m0->value(); - configs.n0 = options.n0->value(); - configs.k0 = options.k0->value(); - configs.h0 = options.h0->value(); - configs.interleave_rhs = options.interleave_rhs->value() != 0; - configs.transpose_rhs = options.transpose_rhs->value() != 0; + configs.m0 = options.m0->value(); + configs.n0 = options.n0->value(); + configs.k0 = options.k0->value(); + configs.h0 = options.h0->value(); + configs.interleave_rhs = options.interleave_rhs->value() != 0; + configs.transpose_rhs = options.transpose_rhs->value() != 0; + configs.export_to_cl_image_rhs = options.export_to_cl_image_rhs->value() != 0; return configs; } } // namespace -// Create function for CLGEMMMatrixMultiplyReshapedOnlyRHSKernel -using CLGEMMMatrixMultiplyReshapedOnlyRHS = test::CLSynthetizeFunction<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>; +// Create function for ClGemmMatrixMultiplyReshapedOnlyRhsKernel +using CLGEMMMatrixMultiplyReshapedOnlyRHS = test::CLSynthetizeOperator<ClGemmMatrixMultiplyReshapedOnlyRhsKernel>; class CLGEMMMatrixMultiplyReshapedOnlyRHSExample : public Example { @@ -150,10 +159,9 @@ public: bool do_setup(int argc, char **argv) override { // Default parameters - const DataType data_type = DataType::F32; - const float alpha = 1.0f; - const float beta = 0.0f; - const ActivationLayerInfo act_info = ActivationLayerInfo(); + const float alpha = 1.0f; + const float beta = 0.0f; + const ActivationLayerInfo act_info = ActivationLayerInfo(); CommonGemmExampleParams params; GemmConfigs configs; @@ -164,13 +172,13 @@ public: // Parse command line options parser.parse(argc, argv); - if(param_options.help->is_set() && param_options.help->value()) + if (param_options.help->is_set() && param_options.help->value()) { // Print help message parser.print_help(argv[0]); return false; } - if(!parser.validate()) + if (!parser.validate()) { // Invalid arguments. Use default parameters and configs std::cerr << "Invalid arguments." << std::endl; @@ -185,27 +193,30 @@ public: } // Print gemm parameters and configurations - std::cerr << "Gemm parameters:" << std::endl; - std::cerr << params << std::endl; - std::cerr << "Gemm configurations:" << std::endl; - std::cerr << configs << std::endl; + std::cout << "Gemm parameters:" << std::endl; + std::cout << params << std::endl; + std::cout << "Gemm configurations:" << std::endl; + std::cout << configs << std::endl; + + tuner.set_tuner_mode(params.tuner_mode); CLScheduler::get().default_init(&tuner); - lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, data_type)); - rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, data_type)); - bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, data_type)); + lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type)); + rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type)); + bias.allocator()->init(TensorInfo(TensorShape(params.N, 1, params.B), 1, params.data_type)); GEMMLHSMatrixInfo lhs_info; lhs_info.m0 = configs.m0; lhs_info.k0 = configs.k0; GEMMRHSMatrixInfo rhs_info; - rhs_info.n0 = configs.n0; - rhs_info.k0 = configs.k0; - rhs_info.h0 = configs.h0; - rhs_info.interleave = configs.interleave_rhs; - rhs_info.transpose = configs.transpose_rhs; + rhs_info.n0 = configs.n0; + rhs_info.k0 = configs.k0; + rhs_info.h0 = configs.h0; + rhs_info.interleave = configs.interleave_rhs; + rhs_info.transpose = configs.transpose_rhs; + rhs_info.export_to_cl_image = configs.export_to_cl_image_rhs; GEMMKernelInfo kernel_info; kernel_info.m = params.M; @@ -216,11 +227,39 @@ public: kernel_info.broadcast_bias = true; kernel_info.activation_info = act_info; + if (rhs_info.h0 == 0) + { + rhs_info.h0 = std::max(kernel_info.n / rhs_info.n0, 1U); + } + // Initialise rhs_reshaped tensor info - auto_init_if_empty(*rhs_reshaped.info(), rhs.info()->clone()->set_tensor_shape(compute_rhs_reshaped_shape(*rhs.info(), rhs_info))); + rhs_reshaped.allocator()->init( + TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); + + if (rhs_info.export_to_cl_image) + { + if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) + { + std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl; + return false; + } + } + + // Validate argments + Status status{}; + status = gemm.validate(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, + rhs_info, kernel_info); + if (!status) + { + // Unsupported arguments + std::cerr << "Unsupported arguments." << std::endl; + std::cerr << "Check documentation for supported/unsupported combinations" << std::endl; + return false; + } // Configure function - gemm.configure(&lhs, &rhs_reshaped, &bias, &dst, alpha, beta, lhs_info, rhs_info, kernel_info); + gemm.configure(lhs.info(), rhs_reshaped.info(), bias.info(), dst.info(), alpha, beta, lhs_info, rhs_info, + kernel_info); // Allocate tensors lhs.allocator()->allocate(); @@ -234,7 +273,8 @@ public: void do_run() override { // Execute the function - gemm.run(); + ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, {ACL_SRC_1, &rhs_reshaped}, {ACL_SRC_2, &bias}, {ACL_DST, &dst}}); + gemm.run(gemm_pack); // Make sure all the OpenCL jobs are done executing: CLScheduler::get().sync(); @@ -257,7 +297,7 @@ private: /** Main program for gemm reshaped rhs only test * * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] h0, [optional] interleave_rhs, [optional] transpose_rhs ) + * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] h0, [optional] interleave_rhs, [optional] transpose_rhs, [optional] export_to_cl_image) */ int main(int argc, char **argv) { diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp new file mode 100644 index 0000000000..3808b98b7d --- /dev/null +++ b/examples/gemm_tuner/cl_gemmlowp_reshaped.cpp @@ -0,0 +1,335 @@ +/* + * Copyright (c) 2020-2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */ +#error "This example needs to be built with -DARM_COMPUTE_CL" +#endif /* ARM_COMPUTE_CL */ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTuner.h" + +#include "examples/gemm_tuner/CommonGemmExampleOptions.h" +#include "examples/gemm_tuner/GemmTunerHelpers.h" +#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.h" +#include "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.h" +#include "tests/CL/Helper.h" +#include "utils/command_line/CommandLineOptions.h" +#include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" + +#include <cstdlib> + +using namespace arm_compute; +using namespace arm_compute::opencl::kernels; +using namespace utils; +using namespace arm_compute::misc::shape_calculator; +using namespace gemm_tuner; + +namespace +{ +/** Structure holding all tunable gemm configs specific to this example/strategy */ +struct GemmConfigs +{ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t v0{1}; /**< Number of vertical blocks of size (m0xk0) stored on the same output row */ + size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool interleave_lhs{true}; /**< Interleave lhs matrix */ + bool transpose_lhs{true}; /**< Transpose lhs matrix. */ + bool interleave_rhs{true}; /**< Interleave rhs matrix */ + bool transpose_rhs{true}; /**< Transpose rhs matrix. */ +}; + +/** Formatted output of the GemmConfigs type + * + * @param[out] os Output stream. + * @param[in] configs Tunable configurations to output + * + * @return Modified output stream. + */ +::std::ostream &operator<<(::std::ostream &os, const GemmConfigs &configs) +{ + std::string false_str = std::string("false"); + std::string true_str = std::string("true"); + + os << "m0 : " << configs.m0 << std::endl; + os << "n0 : " << configs.n0 << std::endl; + os << "k0 : " << configs.k0 << std::endl; + os << "v0 : " << configs.v0 << std::endl; + os << "h0 : " << configs.h0 << std::endl; + os << "interleave_lhs : " << (configs.interleave_lhs ? true_str : false_str) << std::endl; + os << "transpose_lhs : " << (configs.transpose_lhs ? true_str : false_str) << std::endl; + os << "interleave_rhs : " << (configs.interleave_rhs ? true_str : false_str) << std::endl; + os << "transpose_rhs : " << (configs.transpose_rhs ? true_str : false_str) << std::endl; + return os; +} + +/** Command line options for gemm configs */ +class GemmConfigOptions +{ +public: + /** Constructor + * + * @param[in,out] parser A parser on which "parse()" hasn't been called yet. + */ + GemmConfigOptions(CommandLineParser &parser) + : m0(parser.add_positional_option<SimpleOption<size_t>>("m0", 4)), + n0(parser.add_positional_option<SimpleOption<size_t>>("n0", 4)), + k0(parser.add_positional_option<SimpleOption<size_t>>("k0", 4)), + v0(parser.add_positional_option<SimpleOption<size_t>>("v0", 1)), + h0(parser.add_positional_option<SimpleOption<size_t>>("h0", 1)), + interleave_lhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_lhs", 1)), + interleave_rhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_rhs", 1)), + transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1)) + { + m0->set_help("Number of rows processed by the matrix multiplication"); + n0->set_help("Number of columns processed by the matrix multiplication"); + k0->set_help("Number of partial accumulations performed by the matrix multiplication"); + v0->set_help("Number of vertical blocks of size (m0xk0) stored on the same output row"); + h0->set_help("Number of horizontal blocks of size (k0xn0) stored on the same output row"); + interleave_lhs->set_help("Interleave lhs matrix (1) / Do not interleave lhs matrix (0)"); + interleave_rhs->set_help("Interleave rhs matrix (1) / Do not interleave rhs matrix (0)"); + // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and + // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other + // 2 variants (both transposed and none transposed) + transpose_rhs->set_help("Transpose rhs matrix but not lhs matrix (1) / Do not transpose rhs matrix but do " + "transpose lhs matrix (0)"); + } + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GemmConfigOptions(const GemmConfigOptions &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GemmConfigOptions &operator=(const GemmConfigOptions &) = delete; + /** Allow instances of this class to be moved */ + GemmConfigOptions(GemmConfigOptions &&) = default; + /** Allow instances of this class to be moved */ + GemmConfigOptions &operator=(GemmConfigOptions &&) = default; + /** Default destructor */ + ~GemmConfigOptions() = default; + + SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */ + SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */ + SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ + SimpleOption<size_t> *v0; /**< Number of vertical blocks of size (m0xk0) stored on the same output row option */ + SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ + SimpleOption<size_t> *interleave_lhs; /**< Interleave lhs matrix option (1 enable; 0 disable) */ + SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */ + // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and + // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other + // 2 variants (both transposed and none transposed) + SimpleOption<size_t> * + transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable). Also set the lhs matrix transpose option to the opposite. */ +}; + +/** Consumes the gemm configuration options and creates a structure containing all information + * + * @param[in] options Options to consume + * + * @return Structure containing the gemm configurations + */ +GemmConfigs consume_gemm_configs(const GemmConfigOptions &options) +{ + GemmConfigs configs; + configs.m0 = options.m0->value(); + configs.n0 = options.n0->value(); + configs.k0 = options.k0->value(); + configs.v0 = options.v0->value(); + configs.h0 = options.h0->value(); + configs.interleave_lhs = options.interleave_lhs->value() != 0; + // FIXME: Currently we only support 2 variants of the gemm reshaped kernels in which transpose_lhs and + // transpose_rhs are the opposites of each other. In the future we may extend the kernels to include the other + // 2 variants (both transposed and none transposed) + configs.transpose_lhs = options.transpose_rhs->value() == 0; + configs.interleave_rhs = options.interleave_rhs->value() != 0; + configs.transpose_rhs = options.transpose_rhs->value() != 0; + return configs; +} + +} // namespace + +using ClGemmReshapeLHSMatrix = test::CLSynthetizeOperator<ClGemmReshapeLhsMatrixKernel>; +using ClGemmLowpMatrixMultiplyReshaped = test::CLSynthetizeOperator<ClGemmLowpMatrixMultiplyReshapedKernel>; + +class CLGEMMLowpMatrixMultiplyReshapedExample : public Example +{ +public: + bool do_setup(int argc, char **argv) override + { + // Default parameters + CommonGemmExampleParams params; + GemmConfigs configs; + + // Parse command line options + CommandLineParser parser; + CommonGemmExampleOptions param_options(parser, DataType::QASYMM8); + GemmConfigOptions config_options(parser); + + parser.parse(argc, argv); + if (param_options.help->is_set() && param_options.help->value()) + { + parser.print_help(argv[0]); + return false; + } + if (!parser.validate()) + { + // Invalid arguments. Use default parameters and configs + std::cerr << "Invalid arguments." << std::endl; + parser.print_help(argv[0]); + std::cerr << "Falling back to default parameters and configs" << std::endl; + } + else + { + params = consume_common_gemm_example_parameters(param_options); + configs = consume_gemm_configs(config_options); + } + + std::cout << "Gemm parameters:" << std::endl; + std::cout << params << std::endl; + std::cout << "Gemm configurations:" << std::endl; + std::cout << configs << std::endl; + + tuner.set_tuner_mode(params.tuner_mode); + + CLScheduler::get().default_init(&tuner); + + lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type)); + rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type)); + + // Set arbitrary quantization information + const QuantizationInfo q_info{0.012, 3}; + lhs.info()->set_quantization_info(q_info); + rhs.info()->set_quantization_info(q_info); + dst.info()->set_quantization_info(q_info); + + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = configs.m0; + lhs_info.k0 = configs.k0; + lhs_info.v0 = configs.v0; + lhs_info.interleave = configs.interleave_lhs; + lhs_info.transpose = configs.transpose_lhs; + + GEMMRHSMatrixInfo rhs_info; + rhs_info.n0 = configs.n0; + rhs_info.k0 = configs.k0; + rhs_info.h0 = configs.h0; + rhs_info.interleave = configs.interleave_rhs; + rhs_info.transpose = configs.transpose_rhs; + rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet + + if (rhs_info.h0 == 0) + { + rhs_info.h0 = std::max(static_cast<unsigned int>(params.N) / rhs_info.n0, 1U); + } + + lhs_reshaped.allocator()->init( + TensorInfo(compute_lhs_reshaped_shape(*lhs.info(), lhs_info), 1, params.data_type)); + rhs_reshaped.allocator()->init( + TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); + lhs_reshaped.info()->set_quantization_info(q_info); + rhs_reshaped.info()->set_quantization_info(q_info); + + if (rhs_info.export_to_cl_image) + { + if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) + { + std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl; + return false; + } + } + + GEMMReshapeInfo gemm_info{static_cast<int>(params.M), + static_cast<int>(params.N), + static_cast<int>(params.K), + static_cast<int>(configs.h0), + static_cast<int>(configs.v0), + 0, + false, + true}; + + // Validate argments + if (!reshape_lhs.validate(lhs.info(), lhs_reshaped.info(), lhs_info, gemm_info.reinterpret_input_as_3d())) + { + std::cerr << "Invalid arguments for ClGemmReshapeLHSMatrixKernel." << std::endl; + return false; + } + + if (!gemm.validate(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info)) + { + std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedKernel." << std::endl; + return false; + } + + // Configure functions + reshape_lhs.configure(lhs.info(), lhs_reshaped.info(), lhs_info); + + gemm.configure(lhs_reshaped.info(), rhs_reshaped.info(), dst.info(), lhs_info, rhs_info, gemm_info); + + // Allocate tensors + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + lhs_reshaped.allocator()->allocate(); + rhs_reshaped.allocator()->allocate(); + dst.allocator()->allocate(); + + return true; + } + void do_run() override + { + ITensorPack reshape_lsh_pack({{ACL_SRC, &lhs}, {ACL_DST, &lhs_reshaped}}); + reshape_lhs.run(reshape_lsh_pack); + + ITensorPack gemm_pack({{ACL_SRC_0, &lhs_reshaped}, {ACL_SRC_1, &rhs_reshaped}, {ACL_DST, &dst}}); + gemm.run(gemm_pack); + + // Make sure all the OpenCL jobs are done executing: + CLScheduler::get().sync(); + } + + void do_teardown() override + { + } + +private: + CLTensor lhs{}; + CLTensor rhs{}; + CLTensor lhs_reshaped{}; + CLTensor rhs_reshaped{}; + CLTensor dst{}; + CLTuner tuner{}; + ClGemmReshapeLHSMatrix reshape_lhs{}; + ClGemmLowpMatrixMultiplyReshaped gemm{}; +}; + +/** Main test program for gemmlowp reshaped + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] v0, [optional] h0, [optional] interleave_lhs, [optional] interleave_rhs, [optional] transpose_rhs ) + */ +int main(int argc, char **argv) +{ + return run_example<CLGEMMLowpMatrixMultiplyReshapedExample>(argc, argv); +} diff --git a/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp new file mode 100644 index 0000000000..4acb316a3c --- /dev/null +++ b/examples/gemm_tuner/cl_gemmlowp_reshaped_rhs_only_fused_output_stage_fixedpoint.cpp @@ -0,0 +1,391 @@ +/* + * Copyright (c) 2020-2021, 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */ +#error "This example needs to be built with -DARM_COMPUTE_CL" +#endif /* ARM_COMPUTE_CL */ + +#include "arm_compute/core/Helpers.h" +#include "arm_compute/core/KernelDescriptors.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/misc/ShapeCalculator.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "arm_compute/runtime/CL/CLScheduler.h" +#include "arm_compute/runtime/CL/CLTuner.h" + +#include "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.h" +#include "src/gpu/cl/kernels/ClGemmLowpReductionKernel.h" +#include "tests/CL/Helper.h" +#include "utils/command_line/CommandLineOptions.h" +#include "utils/command_line/CommandLineParser.h" +#include "utils/Utils.h" + +#include "CommonGemmExampleOptions.h" +#include "GemmTunerHelpers.h" +#include <cstdlib> +#include <memory> + +using namespace arm_compute; +using namespace utils; +using namespace arm_compute::opencl::kernels; +using namespace arm_compute::misc::shape_calculator; +using namespace gemm_tuner; + +namespace +{ +/** Structure holding all tunable gemm configs specific to this example/strategy */ +struct GemmConfigs +{ + size_t m0{4}; /**< Number of rows processed by the matrix multiplication */ + size_t n0{4}; /**< Number of columns processed by the matrix multiplication */ + size_t k0{4}; /**< Number of partial accumulations performed by the matrix multiplication */ + size_t h0{1}; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row */ + bool interleave_rhs{true}; /**< Interleave rhs matrix */ + bool transpose_rhs{true}; /**< Transpose rhs matrix */ +}; + +/** Formatted output of the GemmConfigs type + * + * @param[out] os Output stream. + * @param[in] configs Tunable configurations to output + * + * @return Modified output stream. + */ +::std::ostream &operator<<(::std::ostream &os, const GemmConfigs &configs) +{ + std::string false_str = std::string("false"); + std::string true_str = std::string("true"); + + os << "m0 : " << configs.m0 << std::endl; + os << "n0 : " << configs.n0 << std::endl; + os << "k0 : " << configs.k0 << std::endl; + os << "h0 : " << configs.h0 << std::endl; + os << "interleave_rhs : " << (configs.interleave_rhs ? true_str : false_str) << std::endl; + os << "transpose_rhs : " << (configs.transpose_rhs ? true_str : false_str) << std::endl; + return os; +} + +/** Command line options for gemm configs */ +class GemmConfigOptions +{ +public: + /** Constructor + * + * @param[in,out] parser A parser on which "parse()" hasn't been called yet. + */ + GemmConfigOptions(CommandLineParser &parser) + : m0(parser.add_positional_option<SimpleOption<size_t>>("m0", 4)), + n0(parser.add_positional_option<SimpleOption<size_t>>("n0", 4)), + k0(parser.add_positional_option<SimpleOption<size_t>>("k0", 4)), + h0(parser.add_positional_option<SimpleOption<size_t>>("h0", 1)), + interleave_rhs(parser.add_positional_option<SimpleOption<size_t>>("interleave_rhs", 1)), + transpose_rhs(parser.add_positional_option<SimpleOption<size_t>>("transpose_rhs", 1)) + { + m0->set_help("Number of rows processed by the matrix multiplication"); + n0->set_help("Number of columns processed by the matrix multiplication"); + k0->set_help("Number of partial accumulations performed by the matrix multiplication"); + h0->set_help("Number of horizontal blocks of size (k0xn0) stored on the same output row"); + interleave_rhs->set_help("Interleave rhs matrix (1) / Do not interleave rhs matrix (0)"); + transpose_rhs->set_help("Transpose rhs matrix (1) / Do not transpose rhs matrix (0)"); + } + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GemmConfigOptions(const GemmConfigOptions &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + GemmConfigOptions &operator=(const GemmConfigOptions &) = delete; + /** Allow instances of this class to be moved */ + GemmConfigOptions(GemmConfigOptions &&) = default; + /** Allow instances of this class to be moved */ + GemmConfigOptions &operator=(GemmConfigOptions &&) = default; + /** Default destructor */ + ~GemmConfigOptions() = default; + + SimpleOption<size_t> *m0; /**< Number of rows processed by the matrix multiplication option */ + SimpleOption<size_t> *n0; /**< Number of columns processed by the matrix multiplication option */ + SimpleOption<size_t> *k0; /**< Number of partial accumulations performed by the matrix multiplication option */ + SimpleOption<size_t> *h0; /**< Number of horizontal blocks of size (k0xn0) stored on the same output row option */ + SimpleOption<size_t> *interleave_rhs; /**< Interleave rhs matrix option (1 enable; 0 disable) */ + SimpleOption<size_t> *transpose_rhs; /**< Transpose rhs matrix option (1 enable; 0 disable) */ +}; + +/** Consumes the gemm configuration options and creates a structure containing all information + * + * @param[in] options Options to consume + * + * @return Structure containing the gemm configurations + */ +GemmConfigs consume_gemm_configs(const GemmConfigOptions &options) +{ + GemmConfigs configs; + configs.m0 = options.m0->value(); + configs.n0 = options.n0->value(); + configs.k0 = options.k0->value(); + configs.h0 = options.h0->value(); + configs.interleave_rhs = options.interleave_rhs->value() != 0; + configs.transpose_rhs = options.transpose_rhs->value() != 0; + return configs; +} + +} // namespace + +using ClGemmLowpMatrixMultiplyReshapedOnlyRhs = + test::CLSynthetizeOperator<ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel>; +using ClGemmLowpMatrixAReduction = test::CLSynthetizeOperator<ClGemmLowpMatrixAReductionKernel>; + +class CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFusedOutputStageFixedpointExample : public Example +{ +public: + bool do_setup(int argc, char **argv) override + { + // Default parameters + CommonGemmExampleParams params; + GemmConfigs configs; + + // Parse command line options + CommandLineParser parser; + CommonGemmExampleOptions param_options(parser, DataType::QASYMM8); + GemmConfigOptions config_options(parser); + + parser.parse(argc, argv); + if (param_options.help->is_set() && param_options.help->value()) + { + parser.print_help(argv[0]); + return false; + } + if (!parser.validate()) + { + // Invalid arguments. Use default parameters and configs + std::cerr << "Invalid arguments." << std::endl; + parser.print_help(argv[0]); + std::cerr << "Falling back to default parameters and configs" << std::endl; + } + else + { + params = consume_common_gemm_example_parameters(param_options); + configs = consume_gemm_configs(config_options); + } + + std::cout << "Gemm parameters:" << std::endl; + std::cout << params << std::endl; + std::cout << "Gemm configurations:" << std::endl; + std::cout << configs << std::endl; + + tuner.set_tuner_mode(params.tuner_mode); + + CLScheduler::get().default_init(&tuner); + + lhs.allocator()->init(TensorInfo(TensorShape(params.K, params.M, params.B), 1, params.data_type)); + rhs.allocator()->init(TensorInfo(TensorShape(params.N, params.K, params.B), 1, params.data_type)); + bias.allocator()->init(TensorInfo(TensorShape(params.N), 1, DataType::S32)); + dst.allocator()->init(TensorInfo(TensorShape(params.N, params.M, params.B), 1, params.data_type)); + + // Set arbitrary quantization information (non-zero offset to ensure offset contribution stage is included) + // Could be extended in the future to include a user-controlled option for offset == 0 + const QuantizationInfo q_info{0.012, 3}; + lhs.info()->set_quantization_info(q_info); + rhs.info()->set_quantization_info(q_info); + bias.info()->set_quantization_info(q_info); + dst.info()->set_quantization_info(q_info); + + GEMMLHSMatrixInfo lhs_info; + lhs_info.m0 = configs.m0; + lhs_info.k0 = configs.k0; + + GEMMRHSMatrixInfo rhs_info; + rhs_info.n0 = configs.n0; + rhs_info.k0 = configs.k0; + rhs_info.h0 = configs.h0; + rhs_info.interleave = configs.interleave_rhs; + rhs_info.transpose = configs.transpose_rhs; + rhs_info.export_to_cl_image = false; // CL image not supported for quantized cases yet + + if (rhs_info.h0 == 0) + { + rhs_info.h0 = std::max(static_cast<unsigned int>(params.N) / rhs_info.n0, 1U); + } + + rhs_reshaped.allocator()->init( + TensorInfo(compute_rhs_reshaped_shape(*rhs.info(), rhs_info), 1, params.data_type)); + rhs_reshaped.info()->set_quantization_info(q_info); + if (rhs_info.export_to_cl_image) + { + if (!examples::gemm_tuner_helpers::update_padding_for_cl_image(rhs_reshaped.info())) + { + std::cerr << "cl_image is not supported on the device, disable export_to_cl_image" << std::endl; + return false; + } + } + + // Configure output stage for quantized case + GEMMLowpOutputStageInfo gemmlowp_output_stage; + gemmlowp_output_stage.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + gemmlowp_output_stage.output_data_type = dst.info()->data_type(); + gemmlowp_output_stage.gemmlowp_offset = 0; + { + gemmlowp_output_stage.is_quantized_per_channel = false; + // Num_filters is 1 unless quantized type is of per_channel type. Could be extended in the future to support per-channel quantization. + const unsigned int num_filters = 1; + + dst_multipliers.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32)); + dst_shifts.allocator()->init(TensorInfo(TensorShape(num_filters), 1, DataType::S32)); + + gemmlowp_output_stage.gemmlowp_multipliers.resize(num_filters); + gemmlowp_output_stage.gemmlowp_shifts.resize(num_filters); + quantization::compute_quantized_multipliers_and_shifts(lhs.info(), rhs.info(), dst.info(), + gemmlowp_output_stage.gemmlowp_multipliers.data(), + gemmlowp_output_stage.gemmlowp_shifts.data()); + gemmlowp_output_stage.gemmlowp_multiplier = gemmlowp_output_stage.gemmlowp_multipliers[0]; + gemmlowp_output_stage.gemmlowp_shift = gemmlowp_output_stage.gemmlowp_shifts[0]; + + // No fused activation + PixelValue min_val{}; + PixelValue max_val{}; + std::tie(min_val, max_val) = get_min_max(dst.info()->data_type()); + + auto min_activation = min_val.get<int32_t>(); + auto max_activation = max_val.get<int32_t>(); + + // Set the GEMMLowp output stage info + gemmlowp_output_stage.gemmlowp_offset = dst.info()->quantization_info().uniform().offset; + gemmlowp_output_stage.gemmlowp_min_bound = min_activation; + gemmlowp_output_stage.gemmlowp_max_bound = max_activation; + } + + GEMMKernelInfo gemm_info; + gemm_info.m = params.M; + gemm_info.n = params.N; + gemm_info.k = params.K; + gemm_info.depth_output_gemm3d = 0; + gemm_info.reinterpret_input_as_3d = false; + gemm_info.broadcast_bias = true; + gemm_info.fp_mixed_precision = false; + gemm_info.has_pad_y = false; + gemm_info.mult_transpose1xW_width = configs.h0; + gemm_info.lhs_info = lhs_info; + gemm_info.rhs_info = rhs_info; + gemm_info.a_offset = lhs.info()->quantization_info().uniform().offset; + gemm_info.b_offset = rhs.info()->quantization_info().uniform().offset; + gemm_info.output_stage = gemmlowp_output_stage; + + // Initialize Matrix A reduction kernel only if _b_offset is not equal to 0 + if (gemm_info.b_offset != 0) + { + const TensorInfo info_vector_sum_row(compute_reductionB_shape(*lhs.info()), 1, DataType::S32); + vector_sum_row.allocator()->init(info_vector_sum_row); + + mtx_a_reduction = std::make_unique<ClGemmLowpMatrixAReduction>(); + + if (!mtx_a_reduction->validate(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{})) + { + std::cerr << "Invalid arguments for CLGEMMLowpMatrixAReductionKernel." << std::endl; + return false; + } + + mtx_a_reduction->configure(lhs.info(), vector_sum_row.info(), GEMMLowpReductionKernelInfo{}); + } + // Initialize matrix B reduction kernel only if _a_offset is not equal to 0 + if (gemm_info.a_offset != 0) + { + const TensorInfo info_vector_sum_col(compute_reductionA_shape(*rhs.info()), 1, DataType::S32); + vector_sum_col.allocator()->init(info_vector_sum_col); + // There's no need for a Matrix B reduction kernel as this is assumed to be run only once in the prepare stage + } + + // Validate argments + if (!gemm.validate(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, + gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(), + gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(), + dst_multipliers.info(), dst_shifts.info())) + { + std::cerr << "Invalid arguments for ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel." << std::endl; + return false; + } + + // Configure function + gemm.configure(lhs.info(), rhs_reshaped.info(), dst.info(), gemm_info, + gemm_info.a_offset == 0 ? nullptr : vector_sum_col.info(), + gemm_info.b_offset == 0 ? nullptr : vector_sum_row.info(), bias.info(), dst_multipliers.info(), + dst_shifts.info()); + + // Allocate tensors + lhs.allocator()->allocate(); + rhs.allocator()->allocate(); + rhs_reshaped.allocator()->allocate(); + bias.allocator()->allocate(); + dst.allocator()->allocate(); + vector_sum_col.allocator()->allocate(); + vector_sum_row.allocator()->allocate(); + dst_multipliers.allocator()->allocate(); + dst_shifts.allocator()->allocate(); + + return true; + } + void do_run() override + { + if (mtx_a_reduction != nullptr) + { + ITensorPack red_pack({{ACL_SRC, &lhs}, {ACL_DST, &dst}}); + mtx_a_reduction->run(red_pack); + } + + ITensorPack gemm_pack({{ACL_SRC_0, &lhs}, + {ACL_SRC_1, &rhs}, + {ACL_BIAS, &bias}, + {ACL_VEC_COL_SUM, &vector_sum_col}, + {ACL_VEC_ROW_SUM, &vector_sum_row}, + {ACL_SHIFTS, &dst_shifts}, + {ACL_MULTIPLIERS, &dst_multipliers}, + {ACL_DST, &dst}}); + gemm.run(gemm_pack); + + // Make sure all the OpenCL jobs are done executing: + CLScheduler::get().sync(); + } + + void do_teardown() override + { + } + +private: + CLTensor lhs{}; + CLTensor rhs{}; + CLTensor rhs_reshaped{}; + CLTensor bias{}; + CLTensor dst{}; + CLTensor vector_sum_col{}; + CLTensor vector_sum_row{}; + CLTensor dst_multipliers{}; + CLTensor dst_shifts{}; + CLTuner tuner{}; + ClGemmLowpMatrixMultiplyReshapedOnlyRhs gemm{}; + std::unique_ptr<ClGemmLowpMatrixAReduction> mtx_a_reduction{nullptr}; +}; + +/** Main test program for gemmlowp reshaped rhs only with fused output stage fixedpoint + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments ( [optional] M, [optional] N, [optional] K, [optional] B, [optional] m0, [optional] n0, [optional] k0, [optional] h0, [optional] interleave_rhs, [optional] transpose_rhs ) + */ +int main(int argc, char **argv) +{ + return run_example<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSFusedOutputStageFixedpointExample>(argc, argv); +} diff --git a/examples/graph_alexnet.cpp b/examples/graph_alexnet.cpp index 25ede6dca2..be0b8a7d8a 100644 --- a/examples/graph_alexnet.cpp +++ b/examples/graph_alexnet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,8 +39,7 @@ using namespace arm_compute::graph_utils; class GraphAlexnetExample : public Example { public: - GraphAlexnetExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "AlexNet") + GraphAlexnetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "AlexNet") { } bool do_setup(int argc, char **argv) override @@ -53,14 +52,15 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; } // Checks - ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph"); + ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), + "QASYMM8 not supported for this graph"); // Print parameter values std::cout << common_params << std::endl; @@ -69,88 +69,80 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } }; - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); + const std::array<float, 3> mean_rgb{{122.68f, 116.67f, 104.01f}}; + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(227U, 227U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(227U, 227U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) - // Layer 1 - << ConvolutionLayer( - 11U, 11U, 96U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"), - PadStrideInfo(4, 4, 0, 0)) - .set_name("conv1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu1") - << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm1") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool1") - // Layer 2 - << ConvolutionLayer( - 5U, 5U, 256U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"), - PadStrideInfo(1, 1, 2, 2), 2) - .set_name("conv2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu2") - << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm2") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool2") - // Layer 3 - << ConvolutionLayer( - 3U, 3U, 384U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu3") - // Layer 4 - << ConvolutionLayer( - 3U, 3U, 384U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"), - PadStrideInfo(1, 1, 1, 1), 2) - .set_name("conv4") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu4") - // Layer 5 - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"), - PadStrideInfo(1, 1, 1, 1), 2) - .set_name("conv5") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu5") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool5") - // Layer 6 - << FullyConnectedLayer( - 4096U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy")) - .set_name("fc6") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu6") - // Layer 7 - << FullyConnectedLayer( - 4096U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy")) - .set_name("fc7") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu7") - // Layer 8 - << FullyConnectedLayer( - 1000U, - get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy")) - .set_name("fc8") - // Softmax - << SoftmaxLayer().set_name("prob") - << OutputLayer(get_output_accessor(common_params, 5)); + graph + << common_params.target << common_params.fast_math_hint + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) + // Layer 1 + << ConvolutionLayer(11U, 11U, 96U, + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv1_b.npy"), + PadStrideInfo(4, 4, 0, 0)) + .set_name("conv1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu1") + << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm1") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool1") + // Layer 2 + << ConvolutionLayer( + 5U, 5U, 256U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv2_b.npy"), PadStrideInfo(1, 1, 2, 2), 2) + .set_name("conv2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu2") + << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("norm2") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool2") + // Layer 3 + << ConvolutionLayer( + 3U, 3U, 384U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv3_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu3") + // Layer 4 + << ConvolutionLayer( + 3U, 3U, 384U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv4_b.npy"), PadStrideInfo(1, 1, 1, 1), 2) + .set_name("conv4") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu4") + // Layer 5 + << ConvolutionLayer( + 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/conv5_b.npy"), PadStrideInfo(1, 1, 1, 1), 2) + .set_name("conv5") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu5") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool5") + // Layer 6 + << FullyConnectedLayer(4096U, + get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc6_b.npy")) + .set_name("fc6") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu6") + // Layer 7 + << FullyConnectedLayer(4096U, + get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc7_b.npy")) + .set_name("fc7") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu7") + // Layer 8 + << FullyConnectedLayer(1000U, + get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/alexnet_model/fc8_b.npy")) + .set_name("fc8") + // Softmax + << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; @@ -159,10 +151,11 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; // Load the precompiled kernels from a file into the kernel library, in this way the next time they are needed // compilation won't be required. - if(common_params.enable_cl_cache) + if (common_params.enable_cl_cache) { #ifdef ARM_COMPUTE_CL restore_program_cache_from_file(); @@ -172,7 +165,7 @@ public: graph.finalize(common_params.target, config); // Save the opencl kernels to a file - if(common_opts.enable_cl_cache) + if (common_opts.enable_cl_cache) { #ifdef ARM_COMPUTE_CL save_program_cache_to_file(); diff --git a/examples/graph_deepspeech_v0_4_1.cpp b/examples/graph_deepspeech_v0_4_1.cpp index b655452391..08cd4a47b1 100644 --- a/examples/graph_deepspeech_v0_4_1.cpp +++ b/examples/graph_deepspeech_v0_4_1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019-2020 ARM Limited. + * Copyright (c) 2019-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -23,6 +23,7 @@ */ #include "arm_compute/graph.h" #include "arm_compute/graph/Types.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -37,8 +38,7 @@ using namespace arm_compute::graph_utils; class GraphDeepSpeechExample : public Example { public: - GraphDeepSpeechExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "DeepSpeech v0.4.1") + GraphDeepSpeechExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "DeepSpeech v0.4.1") { } bool do_setup(int argc, char **argv) override @@ -51,7 +51,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -64,7 +64,7 @@ public: std::string data_path = common_params.data_path; const std::string model_path = "/cnn_data/deepspeech_model/"; - if(!data_path.empty()) + if (!data_path.empty()) { data_path += model_path; } @@ -77,141 +77,143 @@ public: const float cell_clip = 20.f; // Create input descriptor - const TensorShape tensor_shape = permute_shape(TensorShape(26U, 19U, n_steps, 1U), DataLayout::NHWC, common_params.data_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(26U, 19U, n_steps, 1U), DataLayout::NHWC, common_params.data_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NHWC; - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, - get_weights_accessor(data_path, "input_values_x" + std::to_string(n_steps) + ".npy", weights_layout)) - .set_name("input_node"); + get_weights_accessor(data_path, "input_values_x" + std::to_string(n_steps) + ".npy", + weights_layout)) + .set_name("input_node"); - if(common_params.data_layout == DataLayout::NCHW) + if (common_params.data_layout == DataLayout::NCHW) { graph << PermuteLayer(PermutationVector(2U, 0U, 1U), common_params.data_layout).set_name("permute_to_nhwc"); } graph << ReshapeLayer(TensorShape(494U, n_steps)).set_name("Reshape_input") // Layer 1 - << FullyConnectedLayer( - 2048U, - get_weights_accessor(data_path, "h1_transpose.npy", weights_layout), - get_weights_accessor(data_path, "MatMul_bias.npy")) - .set_name("fc0") + << FullyConnectedLayer(2048U, get_weights_accessor(data_path, "h1_transpose.npy", weights_layout), + get_weights_accessor(data_path, "MatMul_bias.npy")) + .set_name("fc0") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, cell_clip)) - .set_name("Relu") + .set_name("Relu") // Layer 2 - << FullyConnectedLayer( - 2048U, - get_weights_accessor(data_path, "h2_transpose.npy", weights_layout), - get_weights_accessor(data_path, "MatMul_1_bias.npy")) - .set_name("fc1") + << FullyConnectedLayer(2048U, get_weights_accessor(data_path, "h2_transpose.npy", weights_layout), + get_weights_accessor(data_path, "MatMul_1_bias.npy")) + .set_name("fc1") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, cell_clip)) - .set_name("Relu_1") + .set_name("Relu_1") // Layer 3 - << FullyConnectedLayer( - 2048U, - get_weights_accessor(data_path, "h3_transpose.npy", weights_layout), - get_weights_accessor(data_path, "MatMul_2_bias.npy")) - .set_name("fc2") + << FullyConnectedLayer(2048U, get_weights_accessor(data_path, "h3_transpose.npy", weights_layout), + get_weights_accessor(data_path, "MatMul_2_bias.npy")) + .set_name("fc2") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, cell_clip)) - .set_name("Relu_2") + .set_name("Relu_2") // Layer 4 << ReshapeLayer(TensorShape(2048U, 1U, n_steps)).set_name("Reshape_1"); // Unstack Layer (using SplitLayerNode) - NodeParams unstack_params = { "unstack", graph.hints().target_hint }; - NodeID unstack_nid = GraphBuilder::add_split_node(graph.graph(), unstack_params, { graph.tail_node(), 0 }, n_steps, 2); + NodeParams unstack_params = {"unstack", graph.hints().target_hint}; + NodeID unstack_nid = + GraphBuilder::add_split_node(graph.graph(), unstack_params, {graph.tail_node(), 0}, n_steps, 2); // Create input state descriptor - TensorDescriptor state_descriptor = TensorDescriptor(TensorShape(2048U), common_params.data_type).set_layout(common_params.data_layout); - SubStream previous_state(graph); - SubStream add_y(graph); + TensorDescriptor state_descriptor = + TensorDescriptor(TensorShape(2048U), common_params.data_type).set_layout(common_params.data_layout); + SubStream previous_state(graph); + SubStream add_y(graph); // Initial state for LSTM is all zeroes for both state_h and state_c, therefore only one input is created - previous_state << InputLayer(state_descriptor, - get_weights_accessor(data_path, "zeros.npy")) - .set_name("previous_state_c_h"); - add_y << InputLayer(state_descriptor, - get_weights_accessor(data_path, "ones.npy")) - .set_name("add_y"); + previous_state << InputLayer(state_descriptor, get_weights_accessor(data_path, "zeros.npy")) + .set_name("previous_state_c_h"); + add_y << InputLayer(state_descriptor, get_weights_accessor(data_path, "ones.npy")).set_name("add_y"); // Create LSTM Fully Connected weights and bias descriptors - TensorDescriptor lstm_weights_descriptor = TensorDescriptor(TensorShape(4096U, 8192U), common_params.data_type).set_layout(common_params.data_layout); - TensorDescriptor lstm_bias_descriptor = TensorDescriptor(TensorShape(8192U), common_params.data_type).set_layout(common_params.data_layout); - SubStream lstm_fc_weights(graph); - SubStream lstm_fc_bias(graph); - lstm_fc_weights << ConstantLayer(lstm_weights_descriptor, - get_weights_accessor(data_path, "rnn_lstm_cell_kernel_transpose.npy", weights_layout)) - .set_name("h5/transpose"); + TensorDescriptor lstm_weights_descriptor = + TensorDescriptor(TensorShape(4096U, 8192U), common_params.data_type).set_layout(common_params.data_layout); + TensorDescriptor lstm_bias_descriptor = + TensorDescriptor(TensorShape(8192U), common_params.data_type).set_layout(common_params.data_layout); + SubStream lstm_fc_weights(graph); + SubStream lstm_fc_bias(graph); + lstm_fc_weights << ConstantLayer( + lstm_weights_descriptor, + get_weights_accessor(data_path, "rnn_lstm_cell_kernel_transpose.npy", weights_layout)) + .set_name("h5/transpose"); lstm_fc_bias << ConstantLayer(lstm_bias_descriptor, get_weights_accessor(data_path, "rnn_lstm_cell_MatMul_bias.npy")) - .set_name("MatMul_3_bias"); + .set_name("MatMul_3_bias"); // LSTM Block - std::pair<SubStream, SubStream> new_state_1 = add_lstm_cell(unstack_nid, 0, previous_state, previous_state, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_2 = add_lstm_cell(unstack_nid, 1, new_state_1.first, new_state_1.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_3 = add_lstm_cell(unstack_nid, 2, new_state_2.first, new_state_2.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_4 = add_lstm_cell(unstack_nid, 3, new_state_3.first, new_state_3.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_5 = add_lstm_cell(unstack_nid, 4, new_state_4.first, new_state_4.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_6 = add_lstm_cell(unstack_nid, 5, new_state_5.first, new_state_5.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_7 = add_lstm_cell(unstack_nid, 6, new_state_6.first, new_state_6.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_8 = add_lstm_cell(unstack_nid, 7, new_state_7.first, new_state_7.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_9 = add_lstm_cell(unstack_nid, 8, new_state_8.first, new_state_8.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_10 = add_lstm_cell(unstack_nid, 9, new_state_9.first, new_state_9.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_11 = add_lstm_cell(unstack_nid, 10, new_state_10.first, new_state_10.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_12 = add_lstm_cell(unstack_nid, 11, new_state_11.first, new_state_11.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_13 = add_lstm_cell(unstack_nid, 12, new_state_12.first, new_state_12.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_14 = add_lstm_cell(unstack_nid, 13, new_state_13.first, new_state_13.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_15 = add_lstm_cell(unstack_nid, 14, new_state_14.first, new_state_14.second, add_y, lstm_fc_weights, lstm_fc_bias); - std::pair<SubStream, SubStream> new_state_16 = add_lstm_cell(unstack_nid, 15, new_state_15.first, new_state_15.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_1 = + add_lstm_cell(unstack_nid, 0, previous_state, previous_state, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_2 = + add_lstm_cell(unstack_nid, 1, new_state_1.first, new_state_1.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_3 = + add_lstm_cell(unstack_nid, 2, new_state_2.first, new_state_2.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_4 = + add_lstm_cell(unstack_nid, 3, new_state_3.first, new_state_3.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_5 = + add_lstm_cell(unstack_nid, 4, new_state_4.first, new_state_4.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_6 = + add_lstm_cell(unstack_nid, 5, new_state_5.first, new_state_5.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_7 = + add_lstm_cell(unstack_nid, 6, new_state_6.first, new_state_6.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_8 = + add_lstm_cell(unstack_nid, 7, new_state_7.first, new_state_7.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_9 = + add_lstm_cell(unstack_nid, 8, new_state_8.first, new_state_8.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_10 = + add_lstm_cell(unstack_nid, 9, new_state_9.first, new_state_9.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_11 = add_lstm_cell( + unstack_nid, 10, new_state_10.first, new_state_10.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_12 = add_lstm_cell( + unstack_nid, 11, new_state_11.first, new_state_11.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_13 = add_lstm_cell( + unstack_nid, 12, new_state_12.first, new_state_12.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_14 = add_lstm_cell( + unstack_nid, 13, new_state_13.first, new_state_13.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_15 = add_lstm_cell( + unstack_nid, 14, new_state_14.first, new_state_14.second, add_y, lstm_fc_weights, lstm_fc_bias); + std::pair<SubStream, SubStream> new_state_16 = add_lstm_cell( + unstack_nid, 15, new_state_15.first, new_state_15.second, add_y, lstm_fc_weights, lstm_fc_bias); // Concatenate new states on height const int axis = 1; - graph << StackLayer(axis, - std::move(new_state_1.second), - std::move(new_state_2.second), - std::move(new_state_3.second), - std::move(new_state_4.second), - std::move(new_state_5.second), - std::move(new_state_6.second), - std::move(new_state_7.second), - std::move(new_state_8.second), - std::move(new_state_9.second), - std::move(new_state_10.second), - std::move(new_state_11.second), - std::move(new_state_12.second), - std::move(new_state_13.second), - std::move(new_state_14.second), - std::move(new_state_15.second), - std::move(new_state_16.second)) - .set_name("concat"); - - graph << FullyConnectedLayer( - 2048U, - get_weights_accessor(data_path, "h5_transpose.npy", weights_layout), - get_weights_accessor(data_path, "MatMul_3_bias.npy")) - .set_name("fc3") + graph << StackLayer(axis, std::move(new_state_1.second), std::move(new_state_2.second), + std::move(new_state_3.second), std::move(new_state_4.second), std::move(new_state_5.second), + std::move(new_state_6.second), std::move(new_state_7.second), std::move(new_state_8.second), + std::move(new_state_9.second), std::move(new_state_10.second), + std::move(new_state_11.second), std::move(new_state_12.second), + std::move(new_state_13.second), std::move(new_state_14.second), + std::move(new_state_15.second), std::move(new_state_16.second)) + .set_name("concat"); + + graph << FullyConnectedLayer(2048U, get_weights_accessor(data_path, "h5_transpose.npy", weights_layout), + get_weights_accessor(data_path, "MatMul_3_bias.npy")) + .set_name("fc3") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, cell_clip)) - .set_name("Relu3") - << FullyConnectedLayer( - 29U, - get_weights_accessor(data_path, "h6_transpose.npy", weights_layout), - get_weights_accessor(data_path, "MatMul_4_bias.npy")) - .set_name("fc3") + .set_name("Relu3") + << FullyConnectedLayer(29U, get_weights_accessor(data_path, "h6_transpose.npy", weights_layout), + get_weights_accessor(data_path, "MatMul_4_bias.npy")) + .set_name("fc3") << SoftmaxLayer().set_name("logits"); graph << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); @@ -239,7 +241,7 @@ private: return Status{}; } - std::pair<SubStream, SubStream> add_lstm_cell(NodeID unstack_nid, + std::pair<SubStream, SubStream> add_lstm_cell(NodeID unstack_nid, unsigned int unstack_idx, SubStream previous_state_c, SubStream previous_state_h, @@ -248,41 +250,41 @@ private: SubStream lstm_fc_bias) { const std::string cell_name("rnn/lstm_cell_" + std::to_string(unstack_idx)); - const DataLayoutDimension concat_dim = (common_params.data_layout == DataLayout::NHWC) ? DataLayoutDimension::CHANNEL : DataLayoutDimension::WIDTH; + const DataLayoutDimension concat_dim = + (common_params.data_layout == DataLayout::NHWC) ? DataLayoutDimension::CHANNEL : DataLayoutDimension::WIDTH; // Concatenate result of Unstack with previous_state_h - NodeParams concat_params = { cell_name + "/concat", graph.hints().target_hint }; + NodeParams concat_params = {cell_name + "/concat", graph.hints().target_hint}; NodeID concat_nid = graph.graph().add_node<ConcatenateLayerNode>(2, concat_dim); graph.graph().add_connection(unstack_nid, unstack_idx, concat_nid, 0); graph.graph().add_connection(previous_state_h.tail_node(), 0, concat_nid, 1); set_node_params(graph.graph(), concat_nid, concat_params); graph.forward_tail(concat_nid); - graph << FullyConnectedLayer( - 8192U, - lstm_fc_weights, - lstm_fc_bias) - .set_name(cell_name + "/BiasAdd"); + graph << FullyConnectedLayer(8192U, lstm_fc_weights, lstm_fc_bias).set_name(cell_name + "/BiasAdd"); // Split Layer const unsigned int num_splits = 4; const unsigned int split_axis = 0; - NodeParams split_params = { cell_name + "/split", graph.hints().target_hint }; - NodeID split_nid = GraphBuilder::add_split_node(graph.graph(), split_params, { graph.tail_node(), 0 }, num_splits, split_axis); + NodeParams split_params = {cell_name + "/split", graph.hints().target_hint}; + NodeID split_nid = + GraphBuilder::add_split_node(graph.graph(), split_params, {graph.tail_node(), 0}, num_splits, split_axis); - NodeParams sigmoid_1_params = { cell_name + "/Sigmoid_1", graph.hints().target_hint }; - NodeParams add_params = { cell_name + "/add", graph.hints().target_hint }; - NodeParams sigmoid_2_params = { cell_name + "/Sigmoid_2", graph.hints().target_hint }; - NodeParams tanh_params = { cell_name + "/Tanh", graph.hints().target_hint }; + NodeParams sigmoid_1_params = {cell_name + "/Sigmoid_1", graph.hints().target_hint}; + NodeParams add_params = {cell_name + "/add", graph.hints().target_hint}; + NodeParams sigmoid_2_params = {cell_name + "/Sigmoid_2", graph.hints().target_hint}; + NodeParams tanh_params = {cell_name + "/Tanh", graph.hints().target_hint}; // Sigmoid 1 (first split) - NodeID sigmoid_1_nid = graph.graph().add_node<ActivationLayerNode>(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); + NodeID sigmoid_1_nid = graph.graph().add_node<ActivationLayerNode>( + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); graph.graph().add_connection(split_nid, 0, sigmoid_1_nid, 0); set_node_params(graph.graph(), sigmoid_1_nid, sigmoid_1_params); // Tanh (second split) - NodeID tanh_nid = graph.graph().add_node<ActivationLayerNode>(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)); + NodeID tanh_nid = graph.graph().add_node<ActivationLayerNode>( + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)); graph.graph().add_connection(split_nid, 1, tanh_nid, 0); set_node_params(graph.graph(), tanh_nid, tanh_params); @@ -290,13 +292,15 @@ private: tanh_ss.forward_tail(tanh_nid); // Add (third split) - NodeID add_nid = graph.graph().add_node<EltwiseLayerNode>(descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add }); + NodeID add_nid = + graph.graph().add_node<EltwiseLayerNode>(descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add}); graph.graph().add_connection(split_nid, 2, add_nid, 0); graph.graph().add_connection(add_y.tail_node(), 0, add_nid, 1); set_node_params(graph.graph(), add_nid, add_params); // Sigmoid 2 (fourth split) - NodeID sigmoid_2_nid = graph.graph().add_node<ActivationLayerNode>(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); + NodeID sigmoid_2_nid = graph.graph().add_node<ActivationLayerNode>( + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)); graph.graph().add_connection(split_nid, 3, sigmoid_2_nid, 0); set_node_params(graph.graph(), sigmoid_2_nid, sigmoid_2_params); @@ -304,28 +308,28 @@ private: sigmoid_1_ss.forward_tail(sigmoid_1_nid); SubStream mul_1_ss(sigmoid_1_ss); mul_1_ss << EltwiseLayer(std::move(sigmoid_1_ss), std::move(tanh_ss), EltwiseOperation::Mul) - .set_name(cell_name + "/mul_1"); + .set_name(cell_name + "/mul_1"); SubStream tanh_1_ss_tmp(graph); tanh_1_ss_tmp.forward_tail(add_nid); tanh_1_ss_tmp << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC)) - .set_name(cell_name + "/Sigmoid"); + .set_name(cell_name + "/Sigmoid"); SubStream tanh_1_ss_tmp2(tanh_1_ss_tmp); tanh_1_ss_tmp2 << EltwiseLayer(std::move(tanh_1_ss_tmp), std::move(previous_state_c), EltwiseOperation::Mul) - .set_name(cell_name + "/mul"); + .set_name(cell_name + "/mul"); SubStream tanh_1_ss(tanh_1_ss_tmp2); tanh_1_ss << EltwiseLayer(std::move(tanh_1_ss_tmp2), std::move(mul_1_ss), EltwiseOperation::Add) - .set_name(cell_name + "/new_state_c"); + .set_name(cell_name + "/new_state_c"); SubStream new_state_c(tanh_1_ss); tanh_1_ss << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH, 1.f, 1.f)) - .set_name(cell_name + "/Tanh_1"); + .set_name(cell_name + "/Tanh_1"); SubStream sigmoid_2_ss(graph); sigmoid_2_ss.forward_tail(sigmoid_2_nid); graph << EltwiseLayer(std::move(sigmoid_2_ss), std::move(tanh_1_ss), EltwiseOperation::Mul) - .set_name(cell_name + "/new_state_h"); + .set_name(cell_name + "/new_state_h"); SubStream new_state_h(graph); return std::pair<SubStream, SubStream>(new_state_c, new_state_h); diff --git a/examples/graph_edsr.cpp b/examples/graph_edsr.cpp index 2f2a9fa4d7..b4f2fadf4a 100644 --- a/examples/graph_edsr.cpp +++ b/examples/graph_edsr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 ARM Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,28 +22,28 @@ * SOFTWARE. */ +#include "graph_edsr.h" + #include "arm_compute/graph/Utils.h" #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/Utils.h" -#include "graph_edsr.h" - using namespace arm_compute::graph; using namespace arm_compute::utils; class GraphEdsrExample : public Example { public: - GraphEdsrExample() - : cmd_parser(), common_opts(cmd_parser), common_params() + GraphEdsrExample() : cmd_parser(), common_opts(cmd_parser), common_params() { expected_output_filename = cmd_parser.add_option<SimpleOption<std::string>>("expected-output-filename", ""); - expected_output_filename->set_help("Name of npy file containing the expected output to validate the graph output."); + expected_output_filename->set_help( + "Name of npy file containing the expected output to validate the graph output."); } - GraphEdsrExample(const GraphEdsrExample &) = delete; + GraphEdsrExample(const GraphEdsrExample &) = delete; GraphEdsrExample &operator=(const GraphEdsrExample &) = delete; ~GraphEdsrExample() override = default; @@ -57,13 +57,14 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; } - ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type != DataType::QASYMM8, "Only QASYMM8 is supported for this graph example"); + ARM_COMPUTE_EXIT_ON_MSG(common_params.data_type != DataType::QASYMM8, + "Only QASYMM8 is supported for this graph example"); // Print parameter values std::cout << common_params << std::endl; @@ -75,6 +76,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; context.set_config(config); @@ -97,11 +99,32 @@ private: GraphContext context{}; GraphManager manager{}; - SimpleOption<std::string> *expected_output_filename{ nullptr }; + SimpleOption<std::string> *expected_output_filename{nullptr}; GraphEdsr model{}; }; +/** Internal implementation of UINT8 EDSR with some modifications from the paper. + * The sub-pixel convolution has been replaced with a deconvolution layer. This + * operation is mathematically the same. + * + * Convolution replaced by deconvolution: + * https://arxiv.org/abs/1609.07009 + * "Is the deconvolution layer the same as a convolutional layer?" + * Wenzhe Shi, Jose Caballero, Lucas Theis, Ferenc Huszar, Andrew Aitken, Christian Ledig, Zehan Wang + * + * Original model is: + * https://arxiv.org/abs/1707.02921 + * "Enhanced Deep Residual Networks for Single Image Super-Resolution" + * Bee Lim, Sanghyun Son, Heewon Kim, Seungjun Nah, Kyoung Mu Lee + * + * @note To list all the possible arguments execute the binary appended with the --help option + * + * @param[in] argc Number of arguments + * @param[in] argv Arguments + * + * @return Return code + */ int main(int argc, char **argv) { return run_example<GraphEdsrExample>(argc, argv); diff --git a/examples/graph_edsr.h b/examples/graph_edsr.h index cb467d0377..1161e4ba38 100644 --- a/examples/graph_edsr.h +++ b/examples/graph_edsr.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 ARM Limited. + * Copyright (c) 2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -32,12 +32,12 @@ class GraphEdsr { public: - GraphEdsr() - : _graph(0, "EDSR") + GraphEdsr() : _graph(0, "EDSR") { } - bool setup(const arm_compute::utils::CommonGraphParams &common_params, const arm_compute::utils::SimpleOption<std::string> &expected_output_filename) + bool setup(const arm_compute::utils::CommonGraphParams &common_params, + const arm_compute::utils::SimpleOption<std::string> &expected_output_filename) { using namespace arm_compute; using namespace arm_compute::graph; @@ -47,1225 +47,879 @@ public: const auto &data_path = common_params.data_path; const auto &target = common_params.target; - NodeID id_upscale_net_FakeQuantWithMinMaxVars_transposed = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 12, 2, 2, 3 }, - DataType::QASYMM8, - QuantizationInfo(0.00393533194437623, 1), - DataLayout::NHWC }); - INode *node_upscale_net_FakeQuantWithMinMaxVars_transposed = _graph.node(id_upscale_net_FakeQuantWithMinMaxVars_transposed); - node_upscale_net_FakeQuantWithMinMaxVars_transposed->set_common_node_parameters(NodeParams{ "upscale_net_FakeQuantWithMinMaxVars_transposed", target }); - node_upscale_net_FakeQuantWithMinMaxVars_transposed->output(0)->set_accessor(get_weights_accessor(data_path, - "/cnn_data/edsr_model/upscale_net_FakeQuantWithMinMaxVars_transposed.npy", DataLayout::NHWC)); - - NodeID id_pre_upscale_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 12 }, - DataType::S32, - QuantizationInfo(2.9644968435604824e-06), - DataLayout::NHWC }); + NodeID id_upscale_net_FakeQuantWithMinMaxVars_transposed = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{12, 2, 2, 3}, DataType::QASYMM8, QuantizationInfo(0.00393533194437623, 1), DataLayout::NHWC}); + INode *node_upscale_net_FakeQuantWithMinMaxVars_transposed = + _graph.node(id_upscale_net_FakeQuantWithMinMaxVars_transposed); + node_upscale_net_FakeQuantWithMinMaxVars_transposed->set_common_node_parameters( + NodeParams{"upscale_net_FakeQuantWithMinMaxVars_transposed", target}); + node_upscale_net_FakeQuantWithMinMaxVars_transposed->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/upscale_net_FakeQuantWithMinMaxVars_transposed.npy", DataLayout::NHWC)); + + NodeID id_pre_upscale_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{12}, DataType::S32, QuantizationInfo(2.9644968435604824e-06), DataLayout::NHWC}); INode *node_pre_upscale_Conv2D_bias = _graph.node(id_pre_upscale_Conv2D_bias); - node_pre_upscale_Conv2D_bias->set_common_node_parameters(NodeParams{ "pre_upscale_Conv2D_bias", target }); - node_pre_upscale_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_upscale_Conv2D_bias.npy", DataLayout::NHWC)); - - NodeID id_pre_upscale_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 12 }, - DataType::QASYMM8, - QuantizationInfo(0.000455576169770211, 128), - DataLayout::NHWC }); + node_pre_upscale_Conv2D_bias->set_common_node_parameters(NodeParams{"pre_upscale_Conv2D_bias", target}); + node_pre_upscale_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_upscale_Conv2D_bias.npy", DataLayout::NHWC)); + + NodeID id_pre_upscale_FakeQuantWithMinMaxVars = + _graph.add_node<ConstNode>(TensorDescriptor{TensorShape{256, 3, 3, 12}, DataType::QASYMM8, + QuantizationInfo(0.000455576169770211, 128), DataLayout::NHWC}); INode *node_pre_upscale_FakeQuantWithMinMaxVars = _graph.node(id_pre_upscale_FakeQuantWithMinMaxVars); - node_pre_upscale_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "pre_upscale_FakeQuantWithMinMaxVars", target }); - node_pre_upscale_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_upscale_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_post_residual_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.2760000345224398e-06), - DataLayout::NHWC }); + node_pre_upscale_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"pre_upscale_FakeQuantWithMinMaxVars", target}); + node_pre_upscale_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/pre_upscale_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_post_residual_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.2760000345224398e-06), DataLayout::NHWC}); INode *node_post_residual_Conv2D_bias = _graph.node(id_post_residual_Conv2D_bias); - node_post_residual_Conv2D_bias->set_common_node_parameters(NodeParams{ "post_residual_Conv2D_bias", target }); - node_post_residual_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/post_residual_Conv2D_bias.npy", DataLayout::NHWC)); + node_post_residual_Conv2D_bias->set_common_node_parameters(NodeParams{"post_residual_Conv2D_bias", target}); + node_post_residual_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/post_residual_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_post_residual_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00036424631252884865, 129), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00036424631252884865, 129), DataLayout::NHWC}); INode *node_post_residual_FakeQuantWithMinMaxVars = _graph.node(id_post_residual_FakeQuantWithMinMaxVars); - node_post_residual_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "post_residual_FakeQuantWithMinMaxVars", target }); - node_post_residual_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/post_residual_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - TensorShape scalar_4d_shape{}; + node_post_residual_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"post_residual_FakeQuantWithMinMaxVars", target}); + node_post_residual_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/post_residual_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); - scalar_4d_shape.set(0, 1, false).set(1, 1, false).set(2, 1, false).set(3, 1, false); - - NodeID id_mul_15_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + NodeID id_mul_15_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_15_y = _graph.node(id_mul_15_y); - node_mul_15_y->set_common_node_parameters(NodeParams{ "mul_15_y", target }); - node_mul_15_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_15_y.npy", DataLayout::NHWC)); - - NodeID id_block_15_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.2441644230420934e-06), - DataLayout::NHWC }); + node_mul_15_y->set_common_node_parameters(NodeParams{"mul_15_y", target}); + node_mul_15_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_15_y.npy", DataLayout::NHWC)); + + NodeID id_block_15_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.2441644230420934e-06), DataLayout::NHWC}); INode *node_block_15_1_Conv2D_bias = _graph.node(id_block_15_1_Conv2D_bias); - node_block_15_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_15_1_Conv2D_bias", target }); - node_block_15_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_15_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_15_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_15_1_Conv2D_bias", target}); + node_block_15_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_15_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_15_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00037038681330159307, 125), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00037038681330159307, 125), DataLayout::NHWC}); INode *node_block_15_1_FakeQuantWithMinMaxVars = _graph.node(id_block_15_1_FakeQuantWithMinMaxVars); - node_block_15_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_15_1_FakeQuantWithMinMaxVars", target }); - node_block_15_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_15_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_14_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_15_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_15_1_FakeQuantWithMinMaxVars", target}); + node_block_15_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_15_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_14_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_14_y = _graph.node(id_mul_14_y); - node_mul_14_y->set_common_node_parameters(NodeParams{ "mul_14_y", target }); - node_mul_14_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_14_y.npy", DataLayout::NHWC)); - - NodeID id_block_14_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.3417260333881131e-06), - DataLayout::NHWC }); + node_mul_14_y->set_common_node_parameters(NodeParams{"mul_14_y", target}); + node_mul_14_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_14_y.npy", DataLayout::NHWC)); + + NodeID id_block_14_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.3417260333881131e-06), DataLayout::NHWC}); INode *node_block_14_1_Conv2D_bias = _graph.node(id_block_14_1_Conv2D_bias); - node_block_14_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_14_1_Conv2D_bias", target }); - node_block_14_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_14_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_14_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_14_1_Conv2D_bias", target}); + node_block_14_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_14_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_14_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00040307495510205626, 127), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00040307495510205626, 127), DataLayout::NHWC}); INode *node_block_14_1_FakeQuantWithMinMaxVars = _graph.node(id_block_14_1_FakeQuantWithMinMaxVars); - node_block_14_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_14_1_FakeQuantWithMinMaxVars", target }); - node_block_14_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_14_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_13_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_14_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_14_1_FakeQuantWithMinMaxVars", target}); + node_block_14_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_14_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_13_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_13_y = _graph.node(id_mul_13_y); - node_mul_13_y->set_common_node_parameters(NodeParams{ "mul_13_y", target }); - node_mul_13_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_13_y.npy", DataLayout::NHWC)); - - NodeID id_block_13_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.2636977544389083e-06), - DataLayout::NHWC }); + node_mul_13_y->set_common_node_parameters(NodeParams{"mul_13_y", target}); + node_mul_13_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_13_y.npy", DataLayout::NHWC)); + + NodeID id_block_13_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.2636977544389083e-06), DataLayout::NHWC}); INode *node_block_13_1_Conv2D_bias = _graph.node(id_block_13_1_Conv2D_bias); - node_block_13_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_13_1_Conv2D_bias", target }); - node_block_13_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_13_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_13_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_13_1_Conv2D_bias", target}); + node_block_13_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_13_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_13_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.0003858553245663643, 131), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.0003858553245663643, 131), DataLayout::NHWC}); INode *node_block_13_1_FakeQuantWithMinMaxVars = _graph.node(id_block_13_1_FakeQuantWithMinMaxVars); - node_block_13_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_13_1_FakeQuantWithMinMaxVars", target }); - node_block_13_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_13_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_12_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_13_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_13_1_FakeQuantWithMinMaxVars", target}); + node_block_13_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_13_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_12_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_12_y = _graph.node(id_mul_12_y); - node_mul_12_y->set_common_node_parameters(NodeParams{ "mul_12_y", target }); - node_mul_12_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_12_y.npy", DataLayout::NHWC)); - - NodeID id_block_12_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.3479783547154511e-06), - DataLayout::NHWC }); + node_mul_12_y->set_common_node_parameters(NodeParams{"mul_12_y", target}); + node_mul_12_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_12_y.npy", DataLayout::NHWC)); + + NodeID id_block_12_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.3479783547154511e-06), DataLayout::NHWC}); INode *node_block_12_1_Conv2D_bias = _graph.node(id_block_12_1_Conv2D_bias); - node_block_12_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_12_1_Conv2D_bias", target }); - node_block_12_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_12_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_12_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_12_1_Conv2D_bias", target}); + node_block_12_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_12_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_12_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00041212860378436744, 130), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00041212860378436744, 130), DataLayout::NHWC}); INode *node_block_12_1_FakeQuantWithMinMaxVars = _graph.node(id_block_12_1_FakeQuantWithMinMaxVars); - node_block_12_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_12_1_FakeQuantWithMinMaxVars", target }); - node_block_12_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_12_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_11_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_12_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_12_1_FakeQuantWithMinMaxVars", target}); + node_block_12_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_12_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_11_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_11_y = _graph.node(id_mul_11_y); - node_mul_11_y->set_common_node_parameters(NodeParams{ "mul_11_y", target }); - node_mul_11_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_11_y.npy", DataLayout::NHWC)); - - NodeID id_block_11_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.2847248171965475e-06), - DataLayout::NHWC }); + node_mul_11_y->set_common_node_parameters(NodeParams{"mul_11_y", target}); + node_mul_11_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_11_y.npy", DataLayout::NHWC)); + + NodeID id_block_11_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.2847248171965475e-06), DataLayout::NHWC}); INode *node_block_11_1_Conv2D_bias = _graph.node(id_block_11_1_Conv2D_bias); - node_block_11_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_11_1_Conv2D_bias", target }); - node_block_11_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_11_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_11_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_11_1_Conv2D_bias", target}); + node_block_11_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_11_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_11_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00040296532097272575, 131), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00040296532097272575, 131), DataLayout::NHWC}); INode *node_block_11_1_FakeQuantWithMinMaxVars = _graph.node(id_block_11_1_FakeQuantWithMinMaxVars); - node_block_11_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_11_1_FakeQuantWithMinMaxVars", target }); - node_block_11_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_11_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_10_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_11_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_11_1_FakeQuantWithMinMaxVars", target}); + node_block_11_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_11_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_10_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_10_y = _graph.node(id_mul_10_y); - node_mul_10_y->set_common_node_parameters(NodeParams{ "mul_10_y", target }); - node_mul_10_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_10_y.npy", DataLayout::NHWC)); - - NodeID id_block_10_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.1997129831797793e-06), - DataLayout::NHWC }); + node_mul_10_y->set_common_node_parameters(NodeParams{"mul_10_y", target}); + node_mul_10_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_10_y.npy", DataLayout::NHWC)); + + NodeID id_block_10_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.1997129831797793e-06), DataLayout::NHWC}); INode *node_block_10_1_Conv2D_bias = _graph.node(id_block_10_1_Conv2D_bias); - node_block_10_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_10_1_Conv2D_bias", target }); - node_block_10_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_10_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_10_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_10_1_Conv2D_bias", target}); + node_block_10_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_10_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_10_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00036640543839894235, 129), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00036640543839894235, 129), DataLayout::NHWC}); INode *node_block_10_1_FakeQuantWithMinMaxVars = _graph.node(id_block_10_1_FakeQuantWithMinMaxVars); - node_block_10_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_10_1_FakeQuantWithMinMaxVars", target }); - node_block_10_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_10_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_9_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_10_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_10_1_FakeQuantWithMinMaxVars", target}); + node_block_10_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_10_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_9_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_9_y = _graph.node(id_mul_9_y); - node_mul_9_y->set_common_node_parameters(NodeParams{ "mul_9_y", target }); - node_mul_9_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_9_y.npy", DataLayout::NHWC)); - - NodeID id_block_9_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.1920226370421005e-06), - DataLayout::NHWC }); + node_mul_9_y->set_common_node_parameters(NodeParams{"mul_9_y", target}); + node_mul_9_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_9_y.npy", DataLayout::NHWC)); + + NodeID id_block_9_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.1920226370421005e-06), DataLayout::NHWC}); INode *node_block_9_1_Conv2D_bias = _graph.node(id_block_9_1_Conv2D_bias); - node_block_9_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_9_1_Conv2D_bias", target }); - node_block_9_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_9_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_9_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_9_1_Conv2D_bias", target}); + node_block_9_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_9_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_9_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.0003706997958943248, 129), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.0003706997958943248, 129), DataLayout::NHWC}); INode *node_block_9_1_FakeQuantWithMinMaxVars = _graph.node(id_block_9_1_FakeQuantWithMinMaxVars); - node_block_9_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_9_1_FakeQuantWithMinMaxVars", target }); - node_block_9_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_9_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_8_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_9_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_9_1_FakeQuantWithMinMaxVars", target}); + node_block_9_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_9_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_8_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_8_y = _graph.node(id_mul_8_y); - node_mul_8_y->set_common_node_parameters(NodeParams{ "mul_8_y", target }); - node_mul_8_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_8_y.npy", DataLayout::NHWC)); - - NodeID id_block_8_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.218903321387188e-06), - DataLayout::NHWC }); + node_mul_8_y->set_common_node_parameters(NodeParams{"mul_8_y", target}); + node_mul_8_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_8_y.npy", DataLayout::NHWC)); + + NodeID id_block_8_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.218903321387188e-06), DataLayout::NHWC}); INode *node_block_8_1_Conv2D_bias = _graph.node(id_block_8_1_Conv2D_bias); - node_block_8_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_8_1_Conv2D_bias", target }); - node_block_8_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_8_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_8_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_8_1_Conv2D_bias", target}); + node_block_8_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_8_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_8_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00038377835880964994, 127), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00038377835880964994, 127), DataLayout::NHWC}); INode *node_block_8_1_FakeQuantWithMinMaxVars = _graph.node(id_block_8_1_FakeQuantWithMinMaxVars); - node_block_8_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_8_1_FakeQuantWithMinMaxVars", target }); - node_block_8_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_8_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_7_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_8_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_8_1_FakeQuantWithMinMaxVars", target}); + node_block_8_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_8_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_7_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_7_y = _graph.node(id_mul_7_y); - node_mul_7_y->set_common_node_parameters(NodeParams{ "mul_7_y", target }); - node_mul_7_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_7_y.npy", DataLayout::NHWC)); - - NodeID id_block_7_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.257252392861119e-06), - DataLayout::NHWC }); + node_mul_7_y->set_common_node_parameters(NodeParams{"mul_7_y", target}); + node_mul_7_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_7_y.npy", DataLayout::NHWC)); + + NodeID id_block_7_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.257252392861119e-06), DataLayout::NHWC}); INode *node_block_7_1_Conv2D_bias = _graph.node(id_block_7_1_Conv2D_bias); - node_block_7_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_7_1_Conv2D_bias", target }); - node_block_7_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_7_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_7_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_7_1_Conv2D_bias", target}); + node_block_7_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_7_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_7_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00039844686398282647, 129), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00039844686398282647, 129), DataLayout::NHWC}); INode *node_block_7_1_FakeQuantWithMinMaxVars = _graph.node(id_block_7_1_FakeQuantWithMinMaxVars); - node_block_7_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_7_1_FakeQuantWithMinMaxVars", target }); - node_block_7_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_7_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_6_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_7_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_7_1_FakeQuantWithMinMaxVars", target}); + node_block_7_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_7_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_6_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_6_y = _graph.node(id_mul_6_y); - node_mul_6_y->set_common_node_parameters(NodeParams{ "mul_6_y", target }); - node_mul_6_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_6_y.npy", DataLayout::NHWC)); - - NodeID id_block_6_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.244850636794581e-06), - DataLayout::NHWC }); + node_mul_6_y->set_common_node_parameters(NodeParams{"mul_6_y", target}); + node_mul_6_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_6_y.npy", DataLayout::NHWC)); + + NodeID id_block_6_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.244850636794581e-06), DataLayout::NHWC}); INode *node_block_6_1_Conv2D_bias = _graph.node(id_block_6_1_Conv2D_bias); - node_block_6_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_6_1_Conv2D_bias", target }); - node_block_6_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_6_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_6_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_6_1_Conv2D_bias", target}); + node_block_6_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_6_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_6_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00040187727427110076, 132), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00040187727427110076, 132), DataLayout::NHWC}); INode *node_block_6_1_FakeQuantWithMinMaxVars = _graph.node(id_block_6_1_FakeQuantWithMinMaxVars); - node_block_6_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_6_1_FakeQuantWithMinMaxVars", target }); - node_block_6_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_6_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_5_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_6_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_6_1_FakeQuantWithMinMaxVars", target}); + node_block_6_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_6_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_5_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_5_y = _graph.node(id_mul_5_y); - node_mul_5_y->set_common_node_parameters(NodeParams{ "mul_5_y", target }); - node_mul_5_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_5_y.npy", DataLayout::NHWC)); - - NodeID id_block_5_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.241092718373693e-06), - DataLayout::NHWC }); + node_mul_5_y->set_common_node_parameters(NodeParams{"mul_5_y", target}); + node_mul_5_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_5_y.npy", DataLayout::NHWC)); + + NodeID id_block_5_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.241092718373693e-06), DataLayout::NHWC}); INode *node_block_5_1_Conv2D_bias = _graph.node(id_block_5_1_Conv2D_bias); - node_block_5_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_5_1_Conv2D_bias", target }); - node_block_5_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_5_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_5_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_5_1_Conv2D_bias", target}); + node_block_5_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_5_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_5_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.0003938926674891263, 129), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.0003938926674891263, 129), DataLayout::NHWC}); INode *node_block_5_1_FakeQuantWithMinMaxVars = _graph.node(id_block_5_1_FakeQuantWithMinMaxVars); - node_block_5_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_5_1_FakeQuantWithMinMaxVars", target }); - node_block_5_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_5_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_4_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_5_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_5_1_FakeQuantWithMinMaxVars", target}); + node_block_5_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_5_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_4_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_4_y = _graph.node(id_mul_4_y); - node_mul_4_y->set_common_node_parameters(NodeParams{ "mul_4_y", target }); - node_mul_4_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_4_y.npy", DataLayout::NHWC)); - - NodeID id_block_4_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.1748390988941537e-06), - DataLayout::NHWC }); + node_mul_4_y->set_common_node_parameters(NodeParams{"mul_4_y", target}); + node_mul_4_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_4_y.npy", DataLayout::NHWC)); + + NodeID id_block_4_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.1748390988941537e-06), DataLayout::NHWC}); INode *node_block_4_1_Conv2D_bias = _graph.node(id_block_4_1_Conv2D_bias); - node_block_4_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_4_1_Conv2D_bias", target }); - node_block_4_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_4_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_4_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_4_1_Conv2D_bias", target}); + node_block_4_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_4_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_4_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.0003788181929849088, 129), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.0003788181929849088, 129), DataLayout::NHWC}); INode *node_block_4_1_FakeQuantWithMinMaxVars = _graph.node(id_block_4_1_FakeQuantWithMinMaxVars); - node_block_4_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_4_1_FakeQuantWithMinMaxVars", target }); - node_block_4_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_4_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_3_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_4_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_4_1_FakeQuantWithMinMaxVars", target}); + node_block_4_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_4_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_3_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_3_y = _graph.node(id_mul_3_y); - node_mul_3_y->set_common_node_parameters(NodeParams{ "mul_3_y", target }); - node_mul_3_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_3_y.npy", DataLayout::NHWC)); - - NodeID id_block_3_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.1937011095142225e-06), - DataLayout::NHWC }); + node_mul_3_y->set_common_node_parameters(NodeParams{"mul_3_y", target}); + node_mul_3_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_3_y.npy", DataLayout::NHWC)); + + NodeID id_block_3_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.1937011095142225e-06), DataLayout::NHWC}); INode *node_block_3_1_Conv2D_bias = _graph.node(id_block_3_1_Conv2D_bias); - node_block_3_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_3_1_Conv2D_bias", target }); - node_block_3_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_3_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_3_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_3_1_Conv2D_bias", target}); + node_block_3_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_3_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_3_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.0003944312920793891, 129), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.0003944312920793891, 129), DataLayout::NHWC}); INode *node_block_3_1_FakeQuantWithMinMaxVars = _graph.node(id_block_3_1_FakeQuantWithMinMaxVars); - node_block_3_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_3_1_FakeQuantWithMinMaxVars", target }); - node_block_3_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_3_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_2_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_3_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_3_1_FakeQuantWithMinMaxVars", target}); + node_block_3_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_3_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_2_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_2_y = _graph.node(id_mul_2_y); - node_mul_2_y->set_common_node_parameters(NodeParams{ "mul_2_y", target }); - node_mul_2_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_2_y.npy", DataLayout::NHWC)); - - NodeID id_block_2_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.1634580232566805e-06), - DataLayout::NHWC }); + node_mul_2_y->set_common_node_parameters(NodeParams{"mul_2_y", target}); + node_mul_2_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_2_y.npy", DataLayout::NHWC)); + + NodeID id_block_2_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.1634580232566805e-06), DataLayout::NHWC}); INode *node_block_2_1_Conv2D_bias = _graph.node(id_block_2_1_Conv2D_bias); - node_block_2_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_2_1_Conv2D_bias", target }); - node_block_2_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_2_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_2_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_2_1_Conv2D_bias", target}); + node_block_2_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_2_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_2_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.0003789655165746808, 132), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.0003789655165746808, 132), DataLayout::NHWC}); INode *node_block_2_1_FakeQuantWithMinMaxVars = _graph.node(id_block_2_1_FakeQuantWithMinMaxVars); - node_block_2_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_2_1_FakeQuantWithMinMaxVars", target }); - node_block_2_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_2_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_1_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_2_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_2_1_FakeQuantWithMinMaxVars", target}); + node_block_2_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_2_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_1_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_1_y = _graph.node(id_mul_1_y); - node_mul_1_y->set_common_node_parameters(NodeParams{ "mul_1_y", target }); - node_mul_1_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_1_y.npy", DataLayout::NHWC)); - - NodeID id_block_1_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.197920255435747e-06), - DataLayout::NHWC }); + node_mul_1_y->set_common_node_parameters(NodeParams{"mul_1_y", target}); + node_mul_1_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_1_y.npy", DataLayout::NHWC)); + + NodeID id_block_1_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.197920255435747e-06), DataLayout::NHWC}); INode *node_block_1_1_Conv2D_bias = _graph.node(id_block_1_1_Conv2D_bias); - node_block_1_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_1_1_Conv2D_bias", target }); - node_block_1_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_1_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_1_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_1_1_Conv2D_bias", target}); + node_block_1_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_1_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_1_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00038527738070115447, 132), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00038527738070115447, 132), DataLayout::NHWC}); INode *node_block_1_1_FakeQuantWithMinMaxVars = _graph.node(id_block_1_1_FakeQuantWithMinMaxVars); - node_block_1_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_1_1_FakeQuantWithMinMaxVars", target }); - node_block_1_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_1_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_mul_y = _graph.add_node<ConstNode>( - TensorDescriptor - { - scalar_4d_shape, - DataType::QASYMM8, - QuantizationInfo(0.0003921568568330258), - DataLayout::NHWC }); + node_block_1_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_1_1_FakeQuantWithMinMaxVars", target}); + node_block_1_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_1_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_mul_y = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{1}, DataType::QASYMM8, QuantizationInfo(0.0003921568568330258), DataLayout::NHWC}); INode *node_mul_y = _graph.node(id_mul_y); - node_mul_y->set_common_node_parameters(NodeParams{ "mul_y", target }); - node_mul_y->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_y.npy", DataLayout::NHWC)); - - NodeID id_block_0_1_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.315485519626236e-06), - DataLayout::NHWC }); + node_mul_y->set_common_node_parameters(NodeParams{"mul_y", target}); + node_mul_y->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/mul_y.npy", DataLayout::NHWC)); + + NodeID id_block_0_1_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.315485519626236e-06), DataLayout::NHWC}); INode *node_block_0_1_Conv2D_bias = _graph.node(id_block_0_1_Conv2D_bias); - node_block_0_1_Conv2D_bias->set_common_node_parameters(NodeParams{ "block_0_1_Conv2D_bias", target }); - node_block_0_1_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_0_1_Conv2D_bias.npy", DataLayout::NHWC)); + node_block_0_1_Conv2D_bias->set_common_node_parameters(NodeParams{"block_0_1_Conv2D_bias", target}); + node_block_0_1_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/block_0_1_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_block_0_1_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.00039420535904355347, 129), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{256, 3, 3, 256}, DataType::QASYMM8, + QuantizationInfo(0.00039420535904355347, 129), DataLayout::NHWC}); INode *node_block_0_1_FakeQuantWithMinMaxVars = _graph.node(id_block_0_1_FakeQuantWithMinMaxVars); - node_block_0_1_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "block_0_1_FakeQuantWithMinMaxVars", target }); - node_block_0_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/block_0_1_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); - - NodeID id_pre_residual_Conv2D_bias = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 256 }, - DataType::S32, - QuantizationInfo(1.7214160834555514e-06), - DataLayout::NHWC }); + node_block_0_1_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"block_0_1_FakeQuantWithMinMaxVars", target}); + node_block_0_1_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/block_0_1_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); + + NodeID id_pre_residual_Conv2D_bias = _graph.add_node<ConstNode>(TensorDescriptor{ + TensorShape{256}, DataType::S32, QuantizationInfo(1.7214160834555514e-06), DataLayout::NHWC}); INode *node_pre_residual_Conv2D_bias = _graph.node(id_pre_residual_Conv2D_bias); - node_pre_residual_Conv2D_bias->set_common_node_parameters(NodeParams{ "pre_residual_Conv2D_bias", target }); - node_pre_residual_Conv2D_bias->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_residual_Conv2D_bias.npy", DataLayout::NHWC)); + node_pre_residual_Conv2D_bias->set_common_node_parameters(NodeParams{"pre_residual_Conv2D_bias", target}); + node_pre_residual_Conv2D_bias->output(0)->set_accessor( + get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_residual_Conv2D_bias.npy", DataLayout::NHWC)); NodeID id_pre_residual_FakeQuantWithMinMaxVars = _graph.add_node<ConstNode>( - TensorDescriptor - { - TensorShape{ 3, 3, 3, 256 }, - DataType::QASYMM8, - QuantizationInfo(0.0004389610840007663, 127), - DataLayout::NHWC }); + TensorDescriptor{TensorShape{3, 3, 3, 256}, DataType::QASYMM8, QuantizationInfo(0.0004389610840007663, 127), + DataLayout::NHWC}); INode *node_pre_residual_FakeQuantWithMinMaxVars = _graph.node(id_pre_residual_FakeQuantWithMinMaxVars); - node_pre_residual_FakeQuantWithMinMaxVars->set_common_node_parameters(NodeParams{ "pre_residual_FakeQuantWithMinMaxVars", target }); - node_pre_residual_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor(data_path, "/cnn_data/edsr_model/pre_residual_FakeQuantWithMinMaxVars.npy", - DataLayout::NHWC)); + node_pre_residual_FakeQuantWithMinMaxVars->set_common_node_parameters( + NodeParams{"pre_residual_FakeQuantWithMinMaxVars", target}); + node_pre_residual_FakeQuantWithMinMaxVars->output(0)->set_accessor(get_weights_accessor( + data_path, "/cnn_data/edsr_model/pre_residual_FakeQuantWithMinMaxVars.npy", DataLayout::NHWC)); TensorShape input_shape{}; input_shape.set(0, 3, false).set(1, 360, false).set(2, 640, false).set(3, 1, false); NodeID id_input = _graph.add_node<InputNode>( - TensorDescriptor - { - input_shape, - DataType::QASYMM8, - QuantizationInfo(0.003921568859368563), - DataLayout::NHWC }); + TensorDescriptor{input_shape, DataType::QASYMM8, QuantizationInfo(0.003921568859368563), DataLayout::NHWC}); INode *node_input = _graph.node(id_input); - node_input->set_common_node_parameters(NodeParams{ "input", target }); + node_input->set_common_node_parameters(NodeParams{"input", target}); node_input->output(0)->set_accessor(get_input_accessor(common_params)); - NodeID id_pre_residual_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.0033370566088706255, 96)); + NodeID id_pre_residual_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.0033370566088706255, 96)); INode *node_pre_residual_BiasAdd = _graph.node(id_pre_residual_BiasAdd); - node_pre_residual_BiasAdd->set_common_node_parameters(NodeParams{ "pre_residual_BiasAdd", target }); + node_pre_residual_BiasAdd->set_common_node_parameters(NodeParams{"pre_residual_BiasAdd", target}); _graph.add_connection(id_input, 0, id_pre_residual_BiasAdd, 0); _graph.add_connection(id_pre_residual_FakeQuantWithMinMaxVars, 0, id_pre_residual_BiasAdd, 1); _graph.add_connection(id_pre_residual_Conv2D_bias, 0, id_pre_residual_BiasAdd, 2); - NodeID id_block_0_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.007344874087721109, 185)); + NodeID id_block_0_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.007344874087721109, 185)); INode *node_block_0_1_BiasAdd = _graph.node(id_block_0_1_BiasAdd); - node_block_0_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_0_1_BiasAdd", target }); + node_block_0_1_BiasAdd->set_common_node_parameters(NodeParams{"block_0_1_BiasAdd", target}); _graph.add_connection(id_pre_residual_BiasAdd, 0, id_block_0_1_BiasAdd, 0); _graph.add_connection(id_block_0_1_FakeQuantWithMinMaxVars, 0, id_block_0_1_BiasAdd, 1); _graph.add_connection(id_block_0_1_Conv2D_bias, 0, id_block_0_1_BiasAdd, 2); NodeID id_mul = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0006341293919831514, 174 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0006341293919831514, 174}}); INode *node_mul = _graph.node(id_mul); - node_mul->set_common_node_parameters(NodeParams{ "mul", target }); + node_mul->set_common_node_parameters(NodeParams{"mul", target}); _graph.add_connection(id_block_0_1_BiasAdd, 0, id_mul, 0); _graph.add_connection(id_mul_y, 0, id_mul, 1); NodeID id_add = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0031092411372810602, 95 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0031092411372810602, 95}}); INode *node_add = _graph.node(id_add); - node_add->set_common_node_parameters(NodeParams{ "add", target }); + node_add->set_common_node_parameters(NodeParams{"add", target}); _graph.add_connection(id_pre_residual_BiasAdd, 0, id_add, 0); _graph.add_connection(id_mul, 0, id_add, 1); - NodeID id_block_1_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.005333727691322565, 117)); + NodeID id_block_1_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.005333727691322565, 117)); INode *node_block_1_1_BiasAdd = _graph.node(id_block_1_1_BiasAdd); - node_block_1_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_1_1_BiasAdd", target }); + node_block_1_1_BiasAdd->set_common_node_parameters(NodeParams{"block_1_1_BiasAdd", target}); _graph.add_connection(id_add, 0, id_block_1_1_BiasAdd, 0); _graph.add_connection(id_block_1_1_FakeQuantWithMinMaxVars, 0, id_block_1_1_BiasAdd, 1); _graph.add_connection(id_block_1_1_Conv2D_bias, 0, id_block_1_1_BiasAdd, 2); NodeID id_mul_1 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004965941770933568, 122 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004965941770933568, 122}}); INode *node_mul_1 = _graph.node(id_mul_1); - node_mul_1->set_common_node_parameters(NodeParams{ "mul_1", target }); + node_mul_1->set_common_node_parameters(NodeParams{"mul_1", target}); _graph.add_connection(id_block_1_1_BiasAdd, 0, id_mul_1, 0); _graph.add_connection(id_mul_1_y, 0, id_mul_1, 1); NodeID id_add_1 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0030700892675668, 96 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0030700892675668, 96}}); INode *node_add_1 = _graph.node(id_add_1); - node_add_1->set_common_node_parameters(NodeParams{ "add_1", target }); + node_add_1->set_common_node_parameters(NodeParams{"add_1", target}); _graph.add_connection(id_add, 0, id_add_1, 0); _graph.add_connection(id_mul_1, 0, id_add_1, 1); - NodeID id_block_2_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.004199742339551449, 132)); + NodeID id_block_2_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.004199742339551449, 132)); INode *node_block_2_1_BiasAdd = _graph.node(id_block_2_1_BiasAdd); - node_block_2_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_2_1_BiasAdd", target }); + node_block_2_1_BiasAdd->set_common_node_parameters(NodeParams{"block_2_1_BiasAdd", target}); _graph.add_connection(id_add_1, 0, id_block_2_1_BiasAdd, 0); _graph.add_connection(id_block_2_1_FakeQuantWithMinMaxVars, 0, id_block_2_1_BiasAdd, 1); _graph.add_connection(id_block_2_1_Conv2D_bias, 0, id_block_2_1_BiasAdd, 2); NodeID id_mul_2 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004133903712499887, 130 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004133903712499887, 130}}); INode *node_mul_2 = _graph.node(id_mul_2); - node_mul_2->set_common_node_parameters(NodeParams{ "mul_2", target }); + node_mul_2->set_common_node_parameters(NodeParams{"mul_2", target}); _graph.add_connection(id_block_2_1_BiasAdd, 0, id_mul_2, 0); _graph.add_connection(id_mul_2_y, 0, id_mul_2, 1); NodeID id_add_2 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003026385325938463, 94 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003026385325938463, 94}}); INode *node_add_2 = _graph.node(id_add_2); - node_add_2->set_common_node_parameters(NodeParams{ "add_2", target }); + node_add_2->set_common_node_parameters(NodeParams{"add_2", target}); _graph.add_connection(id_add_1, 0, id_add_2, 0); _graph.add_connection(id_mul_2, 0, id_add_2, 1); - NodeID id_block_3_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.003977528307586908, 142)); + NodeID id_block_3_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.003977528307586908, 142)); INode *node_block_3_1_BiasAdd = _graph.node(id_block_3_1_BiasAdd); - node_block_3_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_3_1_BiasAdd", target }); + node_block_3_1_BiasAdd->set_common_node_parameters(NodeParams{"block_3_1_BiasAdd", target}); _graph.add_connection(id_add_2, 0, id_block_3_1_BiasAdd, 0); _graph.add_connection(id_block_3_1_FakeQuantWithMinMaxVars, 0, id_block_3_1_BiasAdd, 1); _graph.add_connection(id_block_3_1_Conv2D_bias, 0, id_block_3_1_BiasAdd, 2); NodeID id_mul_3 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0003943995980080217, 141 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0003943995980080217, 141}}); INode *node_mul_3 = _graph.node(id_mul_3); - node_mul_3->set_common_node_parameters(NodeParams{ "mul_3", target }); + node_mul_3->set_common_node_parameters(NodeParams{"mul_3", target}); _graph.add_connection(id_block_3_1_BiasAdd, 0, id_mul_3, 0); _graph.add_connection(id_mul_3_y, 0, id_mul_3, 1); NodeID id_add_3 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003101327223703265, 98 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003101327223703265, 98}}); INode *node_add_3 = _graph.node(id_add_3); - node_add_3->set_common_node_parameters(NodeParams{ "add_3", target }); + node_add_3->set_common_node_parameters(NodeParams{"add_3", target}); _graph.add_connection(id_add_2, 0, id_add_3, 0); _graph.add_connection(id_mul_3, 0, id_add_3, 1); - NodeID id_block_4_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.0045388080179691315, 146)); + NodeID id_block_4_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.0045388080179691315, 146)); INode *node_block_4_1_BiasAdd = _graph.node(id_block_4_1_BiasAdd); - node_block_4_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_4_1_BiasAdd", target }); + node_block_4_1_BiasAdd->set_common_node_parameters(NodeParams{"block_4_1_BiasAdd", target}); _graph.add_connection(id_add_3, 0, id_block_4_1_BiasAdd, 0); _graph.add_connection(id_block_4_1_FakeQuantWithMinMaxVars, 0, id_block_4_1_BiasAdd, 1); _graph.add_connection(id_block_4_1_Conv2D_bias, 0, id_block_4_1_BiasAdd, 2); NodeID id_mul_4 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00044342130422592163, 143 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00044342130422592163, 143}}); INode *node_mul_4 = _graph.node(id_mul_4); - node_mul_4->set_common_node_parameters(NodeParams{ "mul_4", target }); + node_mul_4->set_common_node_parameters(NodeParams{"mul_4", target}); _graph.add_connection(id_block_4_1_BiasAdd, 0, id_mul_4, 0); _graph.add_connection(id_mul_4_y, 0, id_mul_4, 1); NodeID id_add_4 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003150839824229479, 98 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003150839824229479, 98}}); INode *node_add_4 = _graph.node(id_add_4); - node_add_4->set_common_node_parameters(NodeParams{ "add_4", target }); + node_add_4->set_common_node_parameters(NodeParams{"add_4", target}); _graph.add_connection(id_add_3, 0, id_add_4, 0); _graph.add_connection(id_mul_4, 0, id_add_4, 1); - NodeID id_block_5_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.00402890844270587, 132)); + NodeID id_block_5_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.00402890844270587, 132)); INode *node_block_5_1_BiasAdd = _graph.node(id_block_5_1_BiasAdd); - node_block_5_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_5_1_BiasAdd", target }); + node_block_5_1_BiasAdd->set_common_node_parameters(NodeParams{"block_5_1_BiasAdd", target}); _graph.add_connection(id_add_4, 0, id_block_5_1_BiasAdd, 0); _graph.add_connection(id_block_5_1_FakeQuantWithMinMaxVars, 0, id_block_5_1_BiasAdd, 1); _graph.add_connection(id_block_5_1_Conv2D_bias, 0, id_block_5_1_BiasAdd, 2); NodeID id_mul_5 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004023382789455354, 132 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004023382789455354, 132}}); INode *node_mul_5 = _graph.node(id_mul_5); - node_mul_5->set_common_node_parameters(NodeParams{ "mul_5", target }); + node_mul_5->set_common_node_parameters(NodeParams{"mul_5", target}); _graph.add_connection(id_block_5_1_BiasAdd, 0, id_mul_5, 0); _graph.add_connection(id_mul_5_y, 0, id_mul_5, 1); NodeID id_add_5 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0030975888948887587, 94 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0030975888948887587, 94}}); INode *node_add_5 = _graph.node(id_add_5); - node_add_5->set_common_node_parameters(NodeParams{ "add_5", target }); + node_add_5->set_common_node_parameters(NodeParams{"add_5", target}); _graph.add_connection(id_add_4, 0, id_add_5, 0); _graph.add_connection(id_mul_5, 0, id_add_5, 1); - NodeID id_block_6_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.00421866774559021, 125)); + NodeID id_block_6_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.00421866774559021, 125)); INode *node_block_6_1_BiasAdd = _graph.node(id_block_6_1_BiasAdd); - node_block_6_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_6_1_BiasAdd", target }); + node_block_6_1_BiasAdd->set_common_node_parameters(NodeParams{"block_6_1_BiasAdd", target}); _graph.add_connection(id_add_5, 0, id_block_6_1_BiasAdd, 0); _graph.add_connection(id_block_6_1_FakeQuantWithMinMaxVars, 0, id_block_6_1_BiasAdd, 1); _graph.add_connection(id_block_6_1_Conv2D_bias, 0, id_block_6_1_BiasAdd, 2); NodeID id_mul_6 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00041950203012675047, 125 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00041950203012675047, 125}}); INode *node_mul_6 = _graph.node(id_mul_6); - node_mul_6->set_common_node_parameters(NodeParams{ "mul_6", target }); + node_mul_6->set_common_node_parameters(NodeParams{"mul_6", target}); _graph.add_connection(id_block_6_1_BiasAdd, 0, id_mul_6, 0); _graph.add_connection(id_mul_6_y, 0, id_mul_6, 1); NodeID id_add_6 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003155382815748453, 92 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003155382815748453, 92}}); INode *node_add_6 = _graph.node(id_add_6); - node_add_6->set_common_node_parameters(NodeParams{ "add_6", target }); + node_add_6->set_common_node_parameters(NodeParams{"add_6", target}); _graph.add_connection(id_add_5, 0, id_add_6, 0); _graph.add_connection(id_mul_6, 0, id_add_6, 1); - NodeID id_block_7_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.004250136204063892, 143)); + NodeID id_block_7_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.004250136204063892, 143)); INode *node_block_7_1_BiasAdd = _graph.node(id_block_7_1_BiasAdd); - node_block_7_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_7_1_BiasAdd", target }); + node_block_7_1_BiasAdd->set_common_node_parameters(NodeParams{"block_7_1_BiasAdd", target}); _graph.add_connection(id_add_6, 0, id_block_7_1_BiasAdd, 0); _graph.add_connection(id_block_7_1_FakeQuantWithMinMaxVars, 0, id_block_7_1_BiasAdd, 1); _graph.add_connection(id_block_7_1_Conv2D_bias, 0, id_block_7_1_BiasAdd, 2); NodeID id_mul_7 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00042401350219734013, 142 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00042401350219734013, 142}}); INode *node_mul_7 = _graph.node(id_mul_7); - node_mul_7->set_common_node_parameters(NodeParams{ "mul_7", target }); + node_mul_7->set_common_node_parameters(NodeParams{"mul_7", target}); _graph.add_connection(id_block_7_1_BiasAdd, 0, id_mul_7, 0); _graph.add_connection(id_mul_7_y, 0, id_mul_7, 1); NodeID id_add_7 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0031760605052113533, 86 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0031760605052113533, 86}}); INode *node_add_7 = _graph.node(id_add_7); - node_add_7->set_common_node_parameters(NodeParams{ "add_7", target }); + node_add_7->set_common_node_parameters(NodeParams{"add_7", target}); _graph.add_connection(id_add_6, 0, id_add_7, 0); _graph.add_connection(id_mul_7, 0, id_add_7, 1); - NodeID id_block_8_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.004277155734598637, 123)); + NodeID id_block_8_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.004277155734598637, 123)); INode *node_block_8_1_BiasAdd = _graph.node(id_block_8_1_BiasAdd); - node_block_8_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_8_1_BiasAdd", target }); + node_block_8_1_BiasAdd->set_common_node_parameters(NodeParams{"block_8_1_BiasAdd", target}); _graph.add_connection(id_add_7, 0, id_block_8_1_BiasAdd, 0); _graph.add_connection(id_block_8_1_FakeQuantWithMinMaxVars, 0, id_block_8_1_BiasAdd, 1); _graph.add_connection(id_block_8_1_Conv2D_bias, 0, id_block_8_1_BiasAdd, 2); NodeID id_mul_8 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00042673019925132394, 123 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00042673019925132394, 123}}); INode *node_mul_8 = _graph.node(id_mul_8); - node_mul_8->set_common_node_parameters(NodeParams{ "mul_8", target }); + node_mul_8->set_common_node_parameters(NodeParams{"mul_8", target}); _graph.add_connection(id_block_8_1_BiasAdd, 0, id_mul_8, 0); _graph.add_connection(id_mul_8_y, 0, id_mul_8, 1); NodeID id_add_8 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0032156009692698717, 86 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0032156009692698717, 86}}); INode *node_add_8 = _graph.node(id_add_8); - node_add_8->set_common_node_parameters(NodeParams{ "add_8", target }); + node_add_8->set_common_node_parameters(NodeParams{"add_8", target}); _graph.add_connection(id_add_7, 0, id_add_8, 0); _graph.add_connection(id_mul_8, 0, id_add_8, 1); - NodeID id_block_9_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.00445037754252553, 129)); + NodeID id_block_9_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.00445037754252553, 129)); INode *node_block_9_1_BiasAdd = _graph.node(id_block_9_1_BiasAdd); - node_block_9_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_9_1_BiasAdd", target }); + node_block_9_1_BiasAdd->set_common_node_parameters(NodeParams{"block_9_1_BiasAdd", target}); _graph.add_connection(id_add_8, 0, id_block_9_1_BiasAdd, 0); _graph.add_connection(id_block_9_1_FakeQuantWithMinMaxVars, 0, id_block_9_1_BiasAdd, 1); _graph.add_connection(id_block_9_1_Conv2D_bias, 0, id_block_9_1_BiasAdd, 2); NodeID id_mul_9 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004448975087143481, 129 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004448975087143481, 129}}); INode *node_mul_9 = _graph.node(id_mul_9); - node_mul_9->set_common_node_parameters(NodeParams{ "mul_9", target }); + node_mul_9->set_common_node_parameters(NodeParams{"mul_9", target}); _graph.add_connection(id_block_9_1_BiasAdd, 0, id_mul_9, 0); _graph.add_connection(id_mul_9_y, 0, id_mul_9, 1); NodeID id_add_9 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0032742770854383707, 80 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0032742770854383707, 80}}); INode *node_add_9 = _graph.node(id_add_9); - node_add_9->set_common_node_parameters(NodeParams{ "add_9", target }); + node_add_9->set_common_node_parameters(NodeParams{"add_9", target}); _graph.add_connection(id_add_8, 0, id_add_9, 0); _graph.add_connection(id_mul_9, 0, id_add_9, 1); - NodeID id_block_10_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.003614710411056876, 131)); + NodeID id_block_10_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.003614710411056876, 131)); INode *node_block_10_1_BiasAdd = _graph.node(id_block_10_1_BiasAdd); - node_block_10_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_10_1_BiasAdd", target }); + node_block_10_1_BiasAdd->set_common_node_parameters(NodeParams{"block_10_1_BiasAdd", target}); _graph.add_connection(id_add_9, 0, id_block_10_1_BiasAdd, 0); _graph.add_connection(id_block_10_1_FakeQuantWithMinMaxVars, 0, id_block_10_1_BiasAdd, 1); _graph.add_connection(id_block_10_1_Conv2D_bias, 0, id_block_10_1_BiasAdd, 2); NodeID id_mul_10 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00036083892337046564, 130 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00036083892337046564, 130}}); INode *node_mul_10 = _graph.node(id_mul_10); - node_mul_10->set_common_node_parameters(NodeParams{ "mul_10", target }); + node_mul_10->set_common_node_parameters(NodeParams{"mul_10", target}); _graph.add_connection(id_block_10_1_BiasAdd, 0, id_mul_10, 0); _graph.add_connection(id_mul_10_y, 0, id_mul_10, 1); NodeID id_add_10 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0031881770119071007, 81 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0031881770119071007, 81}}); INode *node_add_10 = _graph.node(id_add_10); - node_add_10->set_common_node_parameters(NodeParams{ "add_10", target }); + node_add_10->set_common_node_parameters(NodeParams{"add_10", target}); _graph.add_connection(id_add_9, 0, id_add_10, 0); _graph.add_connection(id_mul_10, 0, id_add_10, 1); - NodeID id_block_11_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.003969002980738878, 133)); + NodeID id_block_11_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.003969002980738878, 133)); INode *node_block_11_1_BiasAdd = _graph.node(id_block_11_1_BiasAdd); - node_block_11_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_11_1_BiasAdd", target }); + node_block_11_1_BiasAdd->set_common_node_parameters(NodeParams{"block_11_1_BiasAdd", target}); _graph.add_connection(id_add_10, 0, id_block_11_1_BiasAdd, 0); _graph.add_connection(id_block_11_1_FakeQuantWithMinMaxVars, 0, id_block_11_1_BiasAdd, 1); _graph.add_connection(id_block_11_1_Conv2D_bias, 0, id_block_11_1_BiasAdd, 2); NodeID id_mul_11 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0003968806122429669, 133 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0003968806122429669, 133}}); INode *node_mul_11 = _graph.node(id_mul_11); - node_mul_11->set_common_node_parameters(NodeParams{ "mul_11", target }); + node_mul_11->set_common_node_parameters(NodeParams{"mul_11", target}); _graph.add_connection(id_block_11_1_BiasAdd, 0, id_mul_11, 0); _graph.add_connection(id_mul_11_y, 0, id_mul_11, 1); NodeID id_add_11 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0032707711216062307, 80 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0032707711216062307, 80}}); INode *node_add_11 = _graph.node(id_add_11); - node_add_11->set_common_node_parameters(NodeParams{ "add_11", target }); + node_add_11->set_common_node_parameters(NodeParams{"add_11", target}); _graph.add_connection(id_add_10, 0, id_add_11, 0); _graph.add_connection(id_mul_11, 0, id_add_11, 1); - NodeID id_block_12_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.004366801120340824, 110)); + NodeID id_block_12_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.004366801120340824, 110)); INode *node_block_12_1_BiasAdd = _graph.node(id_block_12_1_BiasAdd); - node_block_12_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_12_1_BiasAdd", target }); + node_block_12_1_BiasAdd->set_common_node_parameters(NodeParams{"block_12_1_BiasAdd", target}); _graph.add_connection(id_add_11, 0, id_block_12_1_BiasAdd, 0); _graph.add_connection(id_block_12_1_FakeQuantWithMinMaxVars, 0, id_block_12_1_BiasAdd, 1); _graph.add_connection(id_block_12_1_Conv2D_bias, 0, id_block_12_1_BiasAdd, 2); NodeID id_mul_12 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004365936329122633, 110 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004365936329122633, 110}}); INode *node_mul_12 = _graph.node(id_mul_12); - node_mul_12->set_common_node_parameters(NodeParams{ "mul_12", target }); + node_mul_12->set_common_node_parameters(NodeParams{"mul_12", target}); _graph.add_connection(id_block_12_1_BiasAdd, 0, id_mul_12, 0); _graph.add_connection(id_mul_12_y, 0, id_mul_12, 1); NodeID id_add_12 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.003275055903941393, 79 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.003275055903941393, 79}}); INode *node_add_12 = _graph.node(id_add_12); - node_add_12->set_common_node_parameters(NodeParams{ "add_12", target }); + node_add_12->set_common_node_parameters(NodeParams{"add_12", target}); _graph.add_connection(id_add_11, 0, id_add_12, 0); _graph.add_connection(id_mul_12, 0, id_add_12, 1); - NodeID id_block_13_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.004386766813695431, 139)); + NodeID id_block_13_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.004386766813695431, 139)); INode *node_block_13_1_BiasAdd = _graph.node(id_block_13_1_BiasAdd); - node_block_13_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_13_1_BiasAdd", target }); + node_block_13_1_BiasAdd->set_common_node_parameters(NodeParams{"block_13_1_BiasAdd", target}); _graph.add_connection(id_add_12, 0, id_block_13_1_BiasAdd, 0); _graph.add_connection(id_block_13_1_FakeQuantWithMinMaxVars, 0, id_block_13_1_BiasAdd, 1); _graph.add_connection(id_block_13_1_Conv2D_bias, 0, id_block_13_1_BiasAdd, 2); NodeID id_mul_13 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004385628562886268, 139 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004385628562886268, 139}}); INode *node_mul_13 = _graph.node(id_mul_13); - node_mul_13->set_common_node_parameters(NodeParams{ "mul_13", target }); + node_mul_13->set_common_node_parameters(NodeParams{"mul_13", target}); _graph.add_connection(id_block_13_1_BiasAdd, 0, id_mul_13, 0); _graph.add_connection(id_mul_13_y, 0, id_mul_13, 1); NodeID id_add_13 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0033287261612713337, 78 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0033287261612713337, 78}}); INode *node_add_13 = _graph.node(id_add_13); - node_add_13->set_common_node_parameters(NodeParams{ "add_13", target }); + node_add_13->set_common_node_parameters(NodeParams{"add_13", target}); _graph.add_connection(id_add_12, 0, id_add_13, 0); _graph.add_connection(id_mul_13, 0, id_add_13, 1); - NodeID id_block_14_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.0038069337606430054, 130)); + NodeID id_block_14_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.0038069337606430054, 130)); INode *node_block_14_1_BiasAdd = _graph.node(id_block_14_1_BiasAdd); - node_block_14_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_14_1_BiasAdd", target }); + node_block_14_1_BiasAdd->set_common_node_parameters(NodeParams{"block_14_1_BiasAdd", target}); _graph.add_connection(id_add_13, 0, id_block_14_1_BiasAdd, 0); _graph.add_connection(id_block_14_1_FakeQuantWithMinMaxVars, 0, id_block_14_1_BiasAdd, 1); _graph.add_connection(id_block_14_1_Conv2D_bias, 0, id_block_14_1_BiasAdd, 2); NodeID id_mul_14 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.00037829321809113026, 130 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.00037829321809113026, 130}}); INode *node_mul_14 = _graph.node(id_mul_14); - node_mul_14->set_common_node_parameters(NodeParams{ "mul_14", target }); + node_mul_14->set_common_node_parameters(NodeParams{"mul_14", target}); _graph.add_connection(id_block_14_1_BiasAdd, 0, id_mul_14, 0); _graph.add_connection(id_mul_14_y, 0, id_mul_14, 1); NodeID id_add_14 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0033590947277843952, 77 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0033590947277843952, 77}}); INode *node_add_14 = _graph.node(id_add_14); - node_add_14->set_common_node_parameters(NodeParams{ "add_14", target }); + node_add_14->set_common_node_parameters(NodeParams{"add_14", target}); _graph.add_connection(id_add_13, 0, id_add_14, 0); _graph.add_connection(id_mul_14, 0, id_add_14, 1); - NodeID id_block_15_1_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.004009159281849861, 130)); + NodeID id_block_15_1_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.004009159281849861, 130)); INode *node_block_15_1_BiasAdd = _graph.node(id_block_15_1_BiasAdd); - node_block_15_1_BiasAdd->set_common_node_parameters(NodeParams{ "block_15_1_BiasAdd", target }); + node_block_15_1_BiasAdd->set_common_node_parameters(NodeParams{"block_15_1_BiasAdd", target}); _graph.add_connection(id_add_14, 0, id_block_15_1_BiasAdd, 0); _graph.add_connection(id_block_15_1_FakeQuantWithMinMaxVars, 0, id_block_15_1_BiasAdd, 1); _graph.add_connection(id_block_15_1_Conv2D_bias, 0, id_block_15_1_BiasAdd, 2); NodeID id_mul_15 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Mul, QuantizationInfo{ 0.0004008286341559142, 130 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Mul, QuantizationInfo{0.0004008286341559142, 130}}); INode *node_mul_15 = _graph.node(id_mul_15); - node_mul_15->set_common_node_parameters(NodeParams{ "mul_15", target }); + node_mul_15->set_common_node_parameters(NodeParams{"mul_15", target}); _graph.add_connection(id_block_15_1_BiasAdd, 0, id_mul_15, 0); _graph.add_connection(id_mul_15_y, 0, id_mul_15, 1); NodeID id_add_15 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0035031239967793226, 78 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0035031239967793226, 78}}); INode *node_add_15 = _graph.node(id_add_15); - node_add_15->set_common_node_parameters(NodeParams{ "add_15", target }); + node_add_15->set_common_node_parameters(NodeParams{"add_15", target}); _graph.add_connection(id_add_14, 0, id_add_15, 0); _graph.add_connection(id_mul_15, 0, id_add_15, 1); - NodeID id_post_residual_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.005167999770492315, 112)); + NodeID id_post_residual_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.005167999770492315, 112)); INode *node_post_residual_BiasAdd = _graph.node(id_post_residual_BiasAdd); - node_post_residual_BiasAdd->set_common_node_parameters(NodeParams{ "post_residual_BiasAdd", target }); + node_post_residual_BiasAdd->set_common_node_parameters(NodeParams{"post_residual_BiasAdd", target}); _graph.add_connection(id_add_15, 0, id_post_residual_BiasAdd, 0); _graph.add_connection(id_post_residual_FakeQuantWithMinMaxVars, 0, id_post_residual_BiasAdd, 1); _graph.add_connection(id_post_residual_Conv2D_bias, 0, id_post_residual_BiasAdd, 2); NodeID id_add_16 = _graph.add_node<EltwiseLayerNode>( - descriptors::EltwiseLayerDescriptor{ EltwiseOperation::Add, QuantizationInfo{ 0.0065071373246610165, 89 } }); + descriptors::EltwiseLayerDescriptor{EltwiseOperation::Add, QuantizationInfo{0.0065071373246610165, 89}}); INode *node_add_16 = _graph.node(id_add_16); - node_add_16->set_common_node_parameters(NodeParams{ "add_16", target }); + node_add_16->set_common_node_parameters(NodeParams{"add_16", target}); _graph.add_connection(id_post_residual_BiasAdd, 0, id_add_16, 0); _graph.add_connection(id_pre_residual_BiasAdd, 0, id_add_16, 1); - NodeID id_pre_upscale_BiasAdd = _graph.add_node<ConvolutionLayerNode>( - PadStrideInfo - { - 1, 1, - 1, 1, - 1, 1, - DimensionRoundingType::FLOOR }, - 1, - arm_compute::graph::ConvolutionMethod::Default, - FastMathHint::Disabled, - QuantizationInfo(0.005013593938201666, 26)); + NodeID id_pre_upscale_BiasAdd = + _graph.add_node<ConvolutionLayerNode>(PadStrideInfo{1, 1, 1, 1, 1, 1, DimensionRoundingType::FLOOR}, 1, + arm_compute::graph::ConvolutionMethod::Default, + FastMathHint::Disabled, QuantizationInfo(0.005013593938201666, 26)); INode *node_pre_upscale_BiasAdd = _graph.node(id_pre_upscale_BiasAdd); - node_pre_upscale_BiasAdd->set_common_node_parameters(NodeParams{ "pre_upscale_BiasAdd", target }); + node_pre_upscale_BiasAdd->set_common_node_parameters(NodeParams{"pre_upscale_BiasAdd", target}); _graph.add_connection(id_add_16, 0, id_pre_upscale_BiasAdd, 0); _graph.add_connection(id_pre_upscale_FakeQuantWithMinMaxVars, 0, id_pre_upscale_BiasAdd, 1); _graph.add_connection(id_pre_upscale_Conv2D_bias, 0, id_pre_upscale_BiasAdd, 2); NodeID id_upscale_net_FakeQuantWithMinMaxVars_1 = _graph.add_node<DeconvolutionLayerNode>( - descriptors::DeconvolutionLayerDescriptor - { - PadStrideInfo{ - 2, 2, - 0, 0, - 0, 0, - DimensionRoundingType::FLOOR }, - QuantizationInfo{ 0.004990961868315935, 26 } }); + descriptors::DeconvolutionLayerDescriptor{PadStrideInfo{2, 2, 0, 0, 0, 0, DimensionRoundingType::FLOOR}, + QuantizationInfo{0.004990961868315935, 26}}); INode *node_upscale_net_FakeQuantWithMinMaxVars_1 = _graph.node(id_upscale_net_FakeQuantWithMinMaxVars_1); - node_upscale_net_FakeQuantWithMinMaxVars_1->set_common_node_parameters(NodeParams{ "upscale_net_FakeQuantWithMinMaxVars_1", target }); + node_upscale_net_FakeQuantWithMinMaxVars_1->set_common_node_parameters( + NodeParams{"upscale_net_FakeQuantWithMinMaxVars_1", target}); _graph.add_connection(id_pre_upscale_BiasAdd, 0, id_upscale_net_FakeQuantWithMinMaxVars_1, 0); - _graph.add_connection(id_upscale_net_FakeQuantWithMinMaxVars_transposed, 0, id_upscale_net_FakeQuantWithMinMaxVars_1, 1); + _graph.add_connection(id_upscale_net_FakeQuantWithMinMaxVars_transposed, 0, + id_upscale_net_FakeQuantWithMinMaxVars_1, 1); TensorShape output_shape; output_shape.set(0, 3, false).set(1, 720, false).set(2, 1280, false).set(3, 1, false); NodeID id_output_140211982446376 = _graph.add_node<OutputNode>(); INode *node_output_140211982446376 = _graph.node(id_output_140211982446376); - node_output_140211982446376->set_common_node_parameters(NodeParams{ "output_140211982446376", target }); + node_output_140211982446376->set_common_node_parameters(NodeParams{"output_140211982446376", target}); _graph.add_connection(id_upscale_net_FakeQuantWithMinMaxVars_1, 0, id_output_140211982446376, 0); - node_output_140211982446376->input(0)->set_accessor(get_npy_output_accessor(expected_output_filename.value(), output_shape, common_params.data_type, - common_params.data_layout)); + node_output_140211982446376->input(0)->set_accessor(get_npy_output_accessor( + expected_output_filename.value(), output_shape, common_params.data_type, common_params.data_layout)); return true; } diff --git a/examples/graph_googlenet.cpp b/examples/graph_googlenet.cpp index 84a10ffce1..f431fc412b 100644 --- a/examples/graph_googlenet.cpp +++ b/examples/graph_googlenet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class GraphGooglenetExample : public Example { public: - GraphGooglenetExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "GoogleNet") + GraphGooglenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "GoogleNet") { } bool do_setup(int argc, char **argv) override @@ -49,14 +49,15 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; } // Checks - ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph"); + ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), + "QASYMM8 not supported for this graph"); // Print parameter values std::cout << common_params << std::endl; @@ -65,64 +66,99 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } }; - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); + const std::array<float, 3> mean_rgb{{122.68f, 116.67f, 104.01f}}; + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) + << ConvolutionLayer(7U, 7U, 64U, + get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_b.npy"), + PadStrideInfo(2, 2, 3, 3)) + .set_name("conv1/7x7_s2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1/relu_7x7") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool1/3x3_s2") + << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)) + .set_name("pool1/norm1") << ConvolutionLayer( - 7U, 7U, 64U, - get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv1/conv1_7x7_s2_b.npy"), - PadStrideInfo(2, 2, 3, 3)) - .set_name("conv1/7x7_s2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/relu_7x7") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool1/3x3_s2") - << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("pool1/norm1") - << ConvolutionLayer( - 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2/3x3_reduce") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2/relu_3x3_reduce") + 1U, 1U, 64U, + get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_reduce_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2/3x3_reduce") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv2/relu_3x3_reduce") << ConvolutionLayer( - 3U, 3U, 192U, - get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2/3x3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2/relu_3x3") - << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)).set_name("conv2/norm2") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool2/3x3_s2"); - graph << get_inception_node(data_path, "inception_3a", weights_layout, 64, std::make_tuple(96U, 128U), std::make_tuple(16U, 32U), 32U).set_name("inception_3a/concat"); - graph << get_inception_node(data_path, "inception_3b", weights_layout, 128, std::make_tuple(128U, 192U), std::make_tuple(32U, 96U), 64U).set_name("inception_3b/concat"); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool3/3x3_s2"); - graph << get_inception_node(data_path, "inception_4a", weights_layout, 192, std::make_tuple(96U, 208U), std::make_tuple(16U, 48U), 64U).set_name("inception_4a/concat"); - graph << get_inception_node(data_path, "inception_4b", weights_layout, 160, std::make_tuple(112U, 224U), std::make_tuple(24U, 64U), 64U).set_name("inception_4b/concat"); - graph << get_inception_node(data_path, "inception_4c", weights_layout, 128, std::make_tuple(128U, 256U), std::make_tuple(24U, 64U), 64U).set_name("inception_4c/concat"); - graph << get_inception_node(data_path, "inception_4d", weights_layout, 112, std::make_tuple(144U, 288U), std::make_tuple(32U, 64U), 64U).set_name("inception_4d/concat"); - graph << get_inception_node(data_path, "inception_4e", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U).set_name("inception_4e/concat"); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool4/3x3_s2"); - graph << get_inception_node(data_path, "inception_5a", weights_layout, 256, std::make_tuple(160U, 320U), std::make_tuple(32U, 128U), 128U).set_name("inception_5a/concat"); - graph << get_inception_node(data_path, "inception_5b", weights_layout, 384, std::make_tuple(192U, 384U), std::make_tuple(48U, 128U), 128U).set_name("inception_5b/concat"); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, operation_layout, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("pool5/7x7_s1") + 3U, 3U, 192U, + get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/googlenet_model/conv2/conv2_3x3_b.npy"), + PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2/3x3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv2/relu_3x3") + << NormalizationLayer(NormalizationLayerInfo(NormType::CROSS_MAP, 5, 0.0001f, 0.75f)) + .set_name("conv2/norm2") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool2/3x3_s2"); + graph << get_inception_node(data_path, "inception_3a", weights_layout, 64, std::make_tuple(96U, 128U), + std::make_tuple(16U, 32U), 32U) + .set_name("inception_3a/concat"); + graph << get_inception_node(data_path, "inception_3b", weights_layout, 128, std::make_tuple(128U, 192U), + std::make_tuple(32U, 96U), 64U) + .set_name("inception_3b/concat"); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool3/3x3_s2"); + graph << get_inception_node(data_path, "inception_4a", weights_layout, 192, std::make_tuple(96U, 208U), + std::make_tuple(16U, 48U), 64U) + .set_name("inception_4a/concat"); + graph << get_inception_node(data_path, "inception_4b", weights_layout, 160, std::make_tuple(112U, 224U), + std::make_tuple(24U, 64U), 64U) + .set_name("inception_4b/concat"); + graph << get_inception_node(data_path, "inception_4c", weights_layout, 128, std::make_tuple(128U, 256U), + std::make_tuple(24U, 64U), 64U) + .set_name("inception_4c/concat"); + graph << get_inception_node(data_path, "inception_4d", weights_layout, 112, std::make_tuple(144U, 288U), + std::make_tuple(32U, 64U), 64U) + .set_name("inception_4d/concat"); + graph << get_inception_node(data_path, "inception_4e", weights_layout, 256, std::make_tuple(160U, 320U), + std::make_tuple(32U, 128U), 128U) + .set_name("inception_4e/concat"); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool4/3x3_s2"); + graph << get_inception_node(data_path, "inception_5a", weights_layout, 256, std::make_tuple(160U, 320U), + std::make_tuple(32U, 128U), 128U) + .set_name("inception_5a/concat"); + graph << get_inception_node(data_path, "inception_5b", weights_layout, 384, std::make_tuple(192U, 384U), + std::make_tuple(48U, 128U), 128U) + .set_name("inception_5b/concat"); + graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 7, operation_layout, + PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool5/7x7_s1") << FullyConnectedLayer( - 1000U, - get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy")) - .set_name("loss3/classifier") - << SoftmaxLayer().set_name("prob") - << OutputLayer(get_output_accessor(common_params, 5)); + 1000U, + get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/googlenet_model/loss3/loss3_classifier_b.npy")) + .set_name("loss3/classifier") + << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; @@ -130,6 +166,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -147,63 +184,63 @@ private: CommonGraphParams common_params; Stream graph; - ConcatLayer get_inception_node(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, - unsigned int a_filt, + ConcatLayer get_inception_node(const std::string &data_path, + std::string &¶m_path, + DataLayout weights_layout, + unsigned int a_filt, std::tuple<unsigned int, unsigned int> b_filters, std::tuple<unsigned int, unsigned int> c_filters, - unsigned int d_filt) + unsigned int d_filt) { std::string total_path = "/cnn_data/googlenet_model/" + param_path + "/" + param_path + "_"; SubStream i_a(graph); - i_a << ConvolutionLayer( - 1U, 1U, a_filt, - get_weights_accessor(data_path, total_path + "1x1_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_1x1"); + i_a << ConvolutionLayer(1U, 1U, a_filt, + get_weights_accessor(data_path, total_path + "1x1_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "1x1_b.npy"), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_1x1"); SubStream i_b(graph); - i_b << ConvolutionLayer( - 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "3x3_reduce_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/3x3_reduce") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_3x3_reduce") - << ConvolutionLayer( - 3U, 3U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "3x3_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "3x3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/3x3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_3x3"); + i_b << ConvolutionLayer(1U, 1U, std::get<0>(b_filters), + get_weights_accessor(data_path, total_path + "3x3_reduce_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "3x3_reduce_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/3x3_reduce") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_3x3_reduce") + << ConvolutionLayer(3U, 3U, std::get<1>(b_filters), + get_weights_accessor(data_path, total_path + "3x3_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "3x3_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/3x3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_3x3"); SubStream i_c(graph); - i_c << ConvolutionLayer( - 1U, 1U, std::get<0>(c_filters), - get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "5x5_reduce_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/5x5_reduce") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_5x5_reduce") - << ConvolutionLayer( - 5U, 5U, std::get<1>(c_filters), - get_weights_accessor(data_path, total_path + "5x5_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "5x5_b.npy"), - PadStrideInfo(1, 1, 2, 2)) - .set_name(param_path + "/5x5") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_5x5"); + i_c << ConvolutionLayer(1U, 1U, std::get<0>(c_filters), + get_weights_accessor(data_path, total_path + "5x5_reduce_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "5x5_reduce_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/5x5_reduce") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_5x5_reduce") + << ConvolutionLayer(5U, 5U, std::get<1>(c_filters), + get_weights_accessor(data_path, total_path + "5x5_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "5x5_b.npy"), PadStrideInfo(1, 1, 2, 2)) + .set_name(param_path + "/5x5") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_5x5"); SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL))).set_name(param_path + "/pool") + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, + PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL))) + .set_name(param_path + "/pool") << ConvolutionLayer( - 1U, 1U, d_filt, - get_weights_accessor(data_path, total_path + "pool_proj_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "pool_proj_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/pool_proj") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_pool_proj"); + 1U, 1U, d_filt, get_weights_accessor(data_path, total_path + "pool_proj_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "pool_proj_b.npy"), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/pool_proj") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_pool_proj"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } diff --git a/examples/graph_inception_resnet_v1.cpp b/examples/graph_inception_resnet_v1.cpp index ea9bf8f5a9..a54a0f7806 100644 --- a/examples/graph_inception_resnet_v1.cpp +++ b/examples/graph_inception_resnet_v1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -38,7 +39,12 @@ class InceptionResNetV1Example final : public Example { public: InceptionResNetV1Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), model_input_width(nullptr), model_input_height(nullptr), graph(0, "InceptionResNetV1") + : cmd_parser(), + common_opts(cmd_parser), + common_params(), + model_input_width(nullptr), + model_input_height(nullptr), + graph(0, "InceptionResNetV1") { model_input_width = cmd_parser.add_option<SimpleOption<unsigned int>>("image-width", 512); model_input_height = cmd_parser.add_option<SimpleOption<unsigned int>>("image-height", 512); @@ -47,7 +53,7 @@ public: model_input_width->set_help("Input image width."); model_input_height->set_help("Input image height."); } - InceptionResNetV1Example(const InceptionResNetV1Example &) = delete; + InceptionResNetV1Example(const InceptionResNetV1Example &) = delete; InceptionResNetV1Example &operator=(const InceptionResNetV1Example &) = delete; ~InceptionResNetV1Example() override = default; bool do_setup(int argc, char **argv) override @@ -60,7 +66,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -70,13 +76,14 @@ public: const unsigned int image_height = model_input_height->value(); // Set default layout if needed - if(!common_opts.data_layout->is_set() && common_params.target == Target::NEON) + if (!common_opts.data_layout->is_set() && common_params.target == Target::NEON) { common_params.data_layout = DataLayout::NCHW; } // Checks - ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph"); + ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), + "QASYMM8 not supported for this graph"); // Print parameter values std::cout << common_params << std::endl; @@ -86,105 +93,108 @@ public: // Create model path std::string data_path = common_params.data_path; std::string model_path = "/cnn_data/inception_resnet_v1_model/"; - if(!data_path.empty()) + if (!data_path.empty()) { data_path += model_path; } // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(0.f, 1.f); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(0.f, 1.f); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(image_width, image_height, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = permute_shape( + TensorShape(image_width, image_height, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)) // Conv2d_1a_3x3 - << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Conv2d_1a_3x3/convolution") + << ConvolutionLayer( + 3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Conv2d_1a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_beta.npy"), batch_norm_epsilon) - .set_name("Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu") + .set_name("Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_1a_3x3/Relu") // Conv2d_2a_3x3 - << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "Conv2d_2a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_2a_3x3/convolution") + << ConvolutionLayer( + 3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_2a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_2a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_beta.npy"), batch_norm_epsilon) - .set_name("Conv2d_2a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu") + .set_name("Conv2d_2a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_2a_3x3/Relu") // Conv2d_2b_3x3 - << ConvolutionLayer(3U, 3U, 64U, - get_weights_accessor(data_path, "Conv2d_2b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("Conv2d_2b_3x3/convolution") + << ConvolutionLayer( + 3U, 3U, 64U, get_weights_accessor(data_path, "Conv2d_2b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Conv2d_2b_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_beta.npy"), batch_norm_epsilon) - .set_name("Conv2d_2b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu") + .set_name("Conv2d_2b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_2b_3x3/Relu") // MaxPool_3a_3x3 - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)).set_name("MaxPool_3a_3x3/MaxPool") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) + .set_name("MaxPool_3a_3x3/MaxPool") // Conv2d_3b_1x1 - << ConvolutionLayer(1U, 1U, 80U, - get_weights_accessor(data_path, "Conv2d_3b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_3b_1x1/convolution") + << ConvolutionLayer( + 1U, 1U, 80U, get_weights_accessor(data_path, "Conv2d_3b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_3b_1x1/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_beta.npy"), batch_norm_epsilon) - .set_name("Conv2d_3b_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu") + .set_name("Conv2d_3b_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_3b_1x1/Relu") // Conv2d_4a_3x3 - << ConvolutionLayer(3U, 3U, 192U, - get_weights_accessor(data_path, "Conv2d_4a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_4a_3x3/convolution") + << ConvolutionLayer( + 3U, 3U, 192U, get_weights_accessor(data_path, "Conv2d_4a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_4a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_beta.npy"), batch_norm_epsilon) - .set_name("Conv2d_4a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4a_3x3/Relu") + .set_name("Conv2d_4a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_4a_3x3/Relu") // Conv2d_4b_3x3 - << ConvolutionLayer(3U, 3U, 256U, - get_weights_accessor(data_path, "Conv2d_4b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Conv2d_4a_3x3/convolution") + << ConvolutionLayer( + 3U, 3U, 256U, get_weights_accessor(data_path, "Conv2d_4b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Conv2d_4a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_4b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_4b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_4b_3x3_BatchNorm_beta.npy"), batch_norm_epsilon) - .set_name("Conv2d_4b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4b_3x3/Relu"); + .set_name("Conv2d_4b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_4b_3x3/Relu"); // 5 x Inception-resnet-A block35_repeat(data_path, weights_layout, 5); @@ -202,12 +212,10 @@ public: // Logits tail graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("Logits/AvgPool_1a_8x8") << FlattenLayer().set_name("Logits/Flatten") - << FullyConnectedLayer( - 128U, - get_weights_accessor(data_path, "Logits_Logits_weights.npy", weights_layout), - get_weights_accessor(data_path, "Logits_Logits_biases.npy")) - .set_name("Logits/Logits") - << OutputLayer(arm_compute::support::cpp14::make_unique<DummyAccessor>(0)); + << FullyConnectedLayer(128U, get_weights_accessor(data_path, "Logits_Logits_weights.npy", weights_layout), + get_weights_accessor(data_path, "Logits_Logits_biases.npy")) + .set_name("Logits/Logits") + << OutputLayer(std::make_unique<DummyAccessor>(0)); // Finalize graph GraphConfig config; @@ -215,6 +223,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -230,14 +239,14 @@ private: CommandLineParser cmd_parser; CommonGraphOptions common_opts; CommonGraphParams common_params; - SimpleOption<unsigned int> *model_input_width{ nullptr }; - SimpleOption<unsigned int> *model_input_height{ nullptr }; + SimpleOption<unsigned int> *model_input_width{nullptr}; + SimpleOption<unsigned int> *model_input_height{nullptr}; Stream graph; private: void block35_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks) { - for(unsigned int i = 0; i < num_blocks; ++i) + for (unsigned int i = 0; i < num_blocks; ++i) { std::stringstream unit_path_ss; unit_path_ss << "Repeat_block35_" << (i + 1) << "_"; @@ -253,102 +262,128 @@ private: // Branch 0 SubStream i_la(i_l); - i_la << ConvolutionLayer(1U, 1U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); + i_la << ConvolutionLayer( + 1U, 1U, 32U, + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); // Branch 1 SubStream i_lb(i_l); i_lb << ConvolutionLayer(1U, 1U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_3x3/Relu"); + .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/Relu"); // Branch 2 SubStream i_lc(i_l); i_lc << ConvolutionLayer(1U, 1U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0a_1x1/Relu") + .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0b_3x3/Relu") + .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/Relu") << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0c_3x3/Relu"); + .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/Relu"); // Concatenate i_l << ConcatLayer(std::move(i_la), std::move(i_lb), std::move(i_lc)).set_name(unit_name + "concat") - << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Conv2d_1x1/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.17f, 0.f)).set_name(unit_name + "mul"); + << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Conv2d_1x1/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.17f, 0.f)) + .set_name(unit_name + "mul"); graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Relu"); } } void block17_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks) { - for(unsigned int i = 0; i < num_blocks; ++i) + for (unsigned int i = 0; i < num_blocks; ++i) { std::stringstream unit_path_ss; unit_path_ss << "Repeat_1_block17_" << (i + 1) << "_"; @@ -364,79 +399,101 @@ private: // Branch 0 SubStream i_la(i_l); - i_la << ConvolutionLayer(1U, 1U, 128U, - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); + i_la << ConvolutionLayer( + 1U, 1U, 128U, + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); // Branch 1 SubStream i_lb(i_l); i_lb << ConvolutionLayer(1U, 1U, 128U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer(7U, 1U, 128U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_1x7/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/Relu") << ConvolutionLayer(1U, 7U, 128U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) - .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0c_7x1/Relu"); + .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/Relu"); // Concatenate i_l << ConcatLayer(std::move(i_la), std::move(i_lb)).set_name(unit_name + "concat") - << ConvolutionLayer(1U, 1U, 896U, - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Conv2d_1x1/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.10f, 0.f)).set_name(unit_name + "mul"); + << ConvolutionLayer( + 1U, 1U, 896U, + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Conv2d_1x1/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.10f, 0.f)) + .set_name(unit_name + "mul"); graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Relu"); } } - void block8_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks, float scale, bool has_activation) + void block8_repeat(const std::string &data_path, + DataLayout weights_layout, + unsigned int num_blocks, + float scale, + bool has_activation) { - for(unsigned int i = 0; i < num_blocks; ++i) + for (unsigned int i = 0; i < num_blocks; ++i) { std::stringstream unit_path_ss; std::stringstream unit_name_ss; - if(num_blocks != 1) + if (num_blocks != 1) { unit_path_ss << "Repeat_2_block8_" << (i + 1) << "_"; unit_name_ss << "Repeat_2/block8_" << (i + 1) << "/"; @@ -456,79 +513,97 @@ private: // Branch 0 SubStream i_la(i_l); - i_la << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); + i_la << ConvolutionLayer( + 1U, 1U, 192U, + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); // Branch 1 SubStream i_lb(i_l); i_lb << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer(3U, 1U, 192U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_1x3/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/Relu") << ConvolutionLayer(1U, 3U, 192U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) - .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0c_3x1/Relu"); + .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/Relu"); // Concatenate i_l << ConcatLayer(std::move(i_la), std::move(i_lb)).set_name(unit_name + "concat") - << ConvolutionLayer(1U, 1U, 1792U, - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Conv2d_1x1/convolution"); + << ConvolutionLayer( + 1U, 1U, 1792U, + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Conv2d_1x1/convolution"); // Scale result - if(scale != 1.f) + if (scale != 1.f) { - i_l << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, scale, 0.f)).set_name(unit_name + "mul"); + i_l << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, scale, 0.f)) + .set_name(unit_name + "mul"); } // Residual add graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add"); // Apply activation if needed - if(has_activation) + if (has_activation) { - graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Relu"); } } } @@ -537,61 +612,71 @@ private: { // Branch 0 SubStream i_a(graph); - i_a << ConvolutionLayer(3U, 3U, 384U, - get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu"); + i_a << ConvolutionLayer( + 3U, 3U, 384U, + get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu"); // Branch 1 SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 192U, - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu") - << ConvolutionLayer(3U, 3U, 256U, - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 192U, + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 192U, + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu") + << ConvolutionLayer( + 3U, 3U, 256U, + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu"); // Branch 2 SubStream i_c(graph); - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true)).set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3"); + i_c << PoolingLayer( + PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true)) + .set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3"); // Concatenate graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c)).set_name("Mixed_6a/concat"); @@ -601,103 +686,120 @@ private: { // Branch 0 SubStream i_a(graph); - i_a << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 384U, - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu"); + i_a << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 384U, + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu"); // Branch 1 SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 256U, - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 256U, + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu"); // Branch 2 SubStream i_c(graph); - i_c << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 256U, - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/Relu") - << ConvolutionLayer(3U, 3U, 256U, - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_beta.npy"), - batch_norm_epsilon) - .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/Relu"); + i_c << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 256U, + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/Relu") + << ConvolutionLayer( + 3U, 3U, 256U, + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_beta.npy"), + batch_norm_epsilon) + .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/Relu"); // Branch 3 SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true)).set_name("Mixed_7a/Branch_3/MaxPool_1a_3x3"); + i_d << PoolingLayer( + PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true)) + .set_name("Mixed_7a/Branch_3/MaxPool_1a_3x3"); // Concatenate - graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_7a/concat"); + graph + << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_7a/concat"); } }; diff --git a/examples/graph_inception_resnet_v2.cpp b/examples/graph_inception_resnet_v2.cpp index d2f6e1d576..43e31ee14b 100644 --- a/examples/graph_inception_resnet_v2.cpp +++ b/examples/graph_inception_resnet_v2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class InceptionResNetV2Example final : public Example { public: - InceptionResNetV2Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionResNetV2") + InceptionResNetV2Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionResNetV2") { } bool do_setup(int argc, char **argv) override @@ -49,20 +49,21 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; } // Set default layout if needed - if(!common_opts.data_layout->is_set() && common_params.target == Target::NEON) + if (!common_opts.data_layout->is_set() && common_params.target == Target::NEON) { common_params.data_layout = DataLayout::NCHW; } // Checks - ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph"); + ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), + "QASYMM8 not supported for this graph"); // Print parameter values std::cout << common_params << std::endl; @@ -70,94 +71,98 @@ public: // Create model path std::string data_path = common_params.data_path; std::string model_path = "/cnn_data/inception_resnet_v2_model/"; - if(!data_path.empty()) + if (!data_path.empty()) { data_path += model_path; } // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(0.f, 1.f); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(0.f, 1.f); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(299U, 299U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)) // Conv2d_1a_3x3 - << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Conv2d_1a_3x3/convolution") + << ConvolutionLayer( + 3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Conv2d_1a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.0010000000474974513f) - .set_name("Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu") + .set_name("Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_1a_3x3/Relu") // Conv2d_2a_3x3 - << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "Conv2d_2a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_2a_3x3/convolution") + << ConvolutionLayer( + 3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_2a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_2a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_2a_3x3_BatchNorm_beta.npy"), 0.0010000000474974513f) - .set_name("Conv2d_2a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu") + .set_name("Conv2d_2a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_2a_3x3/Relu") // Conv2d_2b_3x3 - << ConvolutionLayer(3U, 3U, 64U, - get_weights_accessor(data_path, "Conv2d_2b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("Conv2d_2b_3x3/convolution") + << ConvolutionLayer( + 3U, 3U, 64U, get_weights_accessor(data_path, "Conv2d_2b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Conv2d_2b_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_2b_3x3_BatchNorm_beta.npy"), 0.0010000000474974513f) - .set_name("Conv2d_2b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu") + .set_name("Conv2d_2b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_2b_3x3/Relu") // MaxPool_3a_3x3 - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)).set_name("MaxPool_3a_3x3/MaxPool") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) + .set_name("MaxPool_3a_3x3/MaxPool") // Conv2d_3b_1x1 - << ConvolutionLayer(1U, 1U, 80U, - get_weights_accessor(data_path, "Conv2d_3b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_3b_1x1/convolution") + << ConvolutionLayer( + 1U, 1U, 80U, get_weights_accessor(data_path, "Conv2d_3b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_3b_1x1/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_3b_1x1_BatchNorm_beta.npy"), 0.0010000000474974513f) - .set_name("Conv2d_3b_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu") + .set_name("Conv2d_3b_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_3b_1x1/Relu") // Conv2d_4a_3x3 - << ConvolutionLayer(3U, 3U, 192U, - get_weights_accessor(data_path, "Conv2d_4a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_4a_3x3/convolution") + << ConvolutionLayer( + 3U, 3U, 192U, get_weights_accessor(data_path, "Conv2d_4a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_4a_3x3/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_4a_3x3_BatchNorm_beta.npy"), 0.0010000000474974513f) - .set_name("Conv2d_4a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4a_3x3/Relu") + .set_name("Conv2d_4a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_4a_3x3/Relu") // MaxPool_5a_3x3 - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0), true)).set_name("MaxPool_5a_3x3/MaxPool"); + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0), true)) + .set_name("MaxPool_5a_3x3/MaxPool"); block_mixed_5b(data_path, weights_layout); block35_repeat(data_path, weights_layout, 10); @@ -168,27 +173,25 @@ public: block8_repeat(data_path, weights_layout, 1, 1.f, false); // Conv2d_7b_1x1 - graph << ConvolutionLayer(1U, 1U, 1536U, - get_weights_accessor(data_path, "Conv2d_7b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_7b_1x1/convolution") + graph << ConvolutionLayer( + 1U, 1U, 1536U, get_weights_accessor(data_path, "Conv2d_7b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_7b_1x1/convolution") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_7b_1x1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv2d_7b_1x1_BatchNorm_moving_variance.npy"), get_random_accessor(1.f, 1.f), get_weights_accessor(data_path, "Conv2d_7b_1x1_BatchNorm_beta.npy"), 0.0010000000474974513f) - .set_name("Conv2d_7b_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_7b_1x1/Relu") + .set_name("Conv2d_7b_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_7b_1x1/Relu") << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("Logits/AvgPool_1a_8x8") << FlattenLayer().set_name("Logits/Flatten") - << FullyConnectedLayer( - 1001U, - get_weights_accessor(data_path, "Logits_Logits_weights.npy", weights_layout), - get_weights_accessor(data_path, "Logits_Logits_biases.npy")) - .set_name("Logits/Logits") - << SoftmaxLayer().set_name("Logits/Predictions") - << OutputLayer(get_output_accessor(common_params, 5)); + << FullyConnectedLayer(1001U, + get_weights_accessor(data_path, "Logits_Logits_weights.npy", weights_layout), + get_weights_accessor(data_path, "Logits_Logits_biases.npy")) + .set_name("Logits/Logits") + << SoftmaxLayer().set_name("Logits/Predictions") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; @@ -196,6 +199,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -218,164 +222,191 @@ private: { // Branch 0 SubStream i_a(graph); - i_a << ConvolutionLayer(1U, 1U, 96U, - get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_5b/Branch_0/Conv2d_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_0/Conv2d_1x1/Relu"); + i_a << ConvolutionLayer( + 1U, 1U, 96U, + get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_5b/Branch_0/Conv2d_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_5b_Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_5b/Branch_0/Conv2d_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_5b/Branch_0/Conv2d_1x1/Relu"); // Branch 1 SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 48U, - get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(5U, 5U, 64U, - get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 2, 2)) - .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 48U, + get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_5b/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 5U, 5U, 64U, + get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 2, 2)) + .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_5b_Branch_1_Conv2d_0b_5x5_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_5b/Branch_1/Conv2d_0b_5x5/Relu"); // Branch 2 SubStream i_c(graph); - i_c << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu") - << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu"); + i_c << ConvolutionLayer( + 1U, 1U, 64U, + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_5b/Branch_2/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 96U, + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_5b/Branch_2/Conv2d_0b_3x3/Relu") + << ConvolutionLayer( + 3U, 3U, 96U, + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_5b_Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_5b/Branch_2/Conv2d_0c_3x3/Relu"); // Branch 3 SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)).set_name("Mixed_5b/Branch_3/AvgPool_0a_3x3") - << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu"); + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, + PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + .set_name("Mixed_5b/Branch_3/AvgPool_0a_3x3") + << ConvolutionLayer( + 1U, 1U, 64U, + get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_5b_Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_5b/Branch_3/Conv2d_0b_1x1/Relu"); // Concatenate - graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_5a/concat"); + graph + << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_5a/concat"); } void block_mixed_6a(const std::string &data_path, DataLayout weights_layout) { // Branch 0 SubStream i_a(graph); - i_a << ConvolutionLayer(3U, 3U, 384U, - get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu"); + i_a << ConvolutionLayer( + 3U, 3U, 384U, + get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_6a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu"); // Branch 1 SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 256U, - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu") - << ConvolutionLayer(3U, 3U, 384U, - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 256U, + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu") + << ConvolutionLayer( + 3U, 3U, 384U, + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_6a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu"); // Branch 2 SubStream i_c(graph); - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true)).set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3"); + i_c << PoolingLayer( + PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0), true)) + .set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3"); // Concatenate graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c)).set_name("Mixed_6a/concat"); @@ -385,108 +416,125 @@ private: { // Branch 0 SubStream i_a(graph); - i_a << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 384U, - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu"); + i_a << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_0/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 384U, + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu"); // Branch 1 SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 288U, - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 288U, + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu"); // Branch 2 SubStream i_c(graph); - i_c << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 288U, - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/Relu") - << ConvolutionLayer(3U, 3U, 320U, - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/Relu"); + i_c << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_2/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 288U, + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_2/Conv2d_0b_3x3/Relu") + << ConvolutionLayer( + 3U, 3U, 320U, + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "Mixed_7a_Branch_2_Conv2d_1a_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_2/Conv2d_1a_3x3/Relu"); // Branch 3 SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)).set_name("Mixed_7a/Branch_3/MaxPool_1a_3x3"); + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) + .set_name("Mixed_7a/Branch_3/MaxPool_1a_3x3"); // Concatenate - graph << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_7a/concat"); + graph + << ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)).set_name("Mixed_7a/concat"); } void block35_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks) { - for(unsigned int i = 0; i < num_blocks; ++i) + for (unsigned int i = 0; i < num_blocks; ++i) { std::stringstream unit_path_ss; unit_path_ss << "Repeat_block35_" << (i + 1) << "_"; @@ -502,102 +550,128 @@ private: // Branch 0 SubStream i_la(i_l); - i_la << ConvolutionLayer(1U, 1U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); + i_la << ConvolutionLayer( + 1U, 1U, 32U, + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); // Branch 1 SubStream i_lb(i_l); i_lb << ConvolutionLayer(1U, 1U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_3x3/Relu"); + .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0b_3x3/Relu"); // Branch 2 SubStream i_lc(i_l); i_lc << ConvolutionLayer(1U, 1U, 32U, - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0a_1x1/Relu") + .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer(3U, 3U, 48U, - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0b_3x3/Relu") + .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_2/Conv2d_0b_3x3/Relu") << ConvolutionLayer(3U, 3U, 64U, - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_2/Conv2d_0c_3x3/Relu"); + .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_2/Conv2d_0c_3x3/Relu"); // Concatenate i_l << ConcatLayer(std::move(i_la), std::move(i_lb), std::move(i_lc)).set_name(unit_name + "concat") - << ConvolutionLayer(1U, 1U, 320U, - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Conv2d_1x1/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.17f, 0.f)).set_name(unit_name + "mul"); + << ConvolutionLayer( + 1U, 1U, 320U, + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Conv2d_1x1/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.17f, 0.f)) + .set_name(unit_name + "mul"); graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Relu"); } } void block17_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks) { - for(unsigned int i = 0; i < num_blocks; ++i) + for (unsigned int i = 0; i < num_blocks; ++i) { std::stringstream unit_path_ss; unit_path_ss << "Repeat_1_block17_" << (i + 1) << "_"; @@ -613,79 +687,101 @@ private: // Branch 0 SubStream i_la(i_l); - i_la << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); + i_la << ConvolutionLayer( + 1U, 1U, 192U, + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); // Branch 1 SubStream i_lb(i_l); i_lb << ConvolutionLayer(1U, 1U, 128U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer(7U, 1U, 160U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_1x7/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0b_1x7/Relu") << ConvolutionLayer(1U, 7U, 192U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) - .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0c_7x1/Relu"); + .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0c_7x1/Relu"); // Concatenate i_l << ConcatLayer(std::move(i_la), std::move(i_lb)).set_name(unit_name + "concat") - << ConvolutionLayer(1U, 1U, 1088U, - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Conv2d_1x1/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.10f, 0.f)).set_name(unit_name + "mul"); + << ConvolutionLayer( + 1U, 1U, 1088U, + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Conv2d_1x1/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.10f, 0.f)) + .set_name(unit_name + "mul"); graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Relu"); } } - void block8_repeat(const std::string &data_path, DataLayout weights_layout, unsigned int num_blocks, float scale, bool has_activation) + void block8_repeat(const std::string &data_path, + DataLayout weights_layout, + unsigned int num_blocks, + float scale, + bool has_activation) { - for(unsigned int i = 0; i < num_blocks; ++i) + for (unsigned int i = 0; i < num_blocks; ++i) { std::stringstream unit_path_ss; std::stringstream unit_name_ss; - if(num_blocks != 1) + if (num_blocks != 1) { unit_path_ss << "Repeat_2_block8_" << (i + 1) << "_"; unit_name_ss << "Repeat_2/block8_" << (i + 1) << "/"; @@ -705,79 +801,97 @@ private: // Branch 0 SubStream i_la(i_l); - i_la << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); + i_la << ConvolutionLayer( + 1U, 1U, 192U, + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_0_Conv2d_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_0_Conv2d_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_0/Conv2d_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_0/Conv2d_1x1/Relu"); // Branch 1 SubStream i_lb(i_l); i_lb << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer(3U, 1U, 224U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) - .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0b_1x3/Relu") + .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0b_1x3/Relu") << ConvolutionLayer(1U, 3U, 256U, - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_weights.npy", + weights_layout), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) - .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Branch_1/Conv2d_0c_3x1/Relu"); + .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, unit_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Branch_1/Conv2d_0c_3x1/Relu"); // Concatenate i_l << ConcatLayer(std::move(i_la), std::move(i_lb)).set_name(unit_name + "concat") - << ConvolutionLayer(1U, 1U, 2080U, - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "Conv2d_1x1/convolution"); + << ConvolutionLayer( + 1U, 1U, 2080U, + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "Conv2d_1x1_biases.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "Conv2d_1x1/convolution"); // Scale result - if(scale != 1.f) + if (scale != 1.f) { - i_l << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, scale, 0.f)).set_name(unit_name + "mul"); + i_l << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, scale, 0.f)) + .set_name(unit_name + "mul"); } // Residual add graph << EltwiseLayer(std::move(i_l), std::move(i_r), EltwiseOperation::Add).set_name(unit_name + "add"); // Apply activation if needed - if(has_activation) + if (has_activation) { - graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Relu"); } } } diff --git a/examples/graph_inception_v3.cpp b/examples/graph_inception_v3.cpp index 03d5dff9be..75e03fb6b3 100644 --- a/examples/graph_inception_v3.cpp +++ b/examples/graph_inception_v3.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,10 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/Types.h" +#include "arm_compute/core/Utils.h" #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +38,7 @@ using namespace arm_compute::graph_utils; class InceptionV3Example : public Example { public: - InceptionV3Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV3") + InceptionV3Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV3") { } bool do_setup(int argc, char **argv) override @@ -49,7 +51,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -62,147 +64,177 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(299U, 299U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)) - << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) - .set_name("Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - nullptr, get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Conv2d_1a_3x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu") - << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_2a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"), - nullptr, get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Conv2d_2a_3x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu") - - << ConvolutionLayer(3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name("Conv2d_2b_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"), - nullptr, get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Conv2d_2b_3x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu") - - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_3a_3x3/MaxPool") - - << ConvolutionLayer(1U, 1U, 80U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_3b_1x1/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_variance.npy"), - nullptr, get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name("Conv2d_3b_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_3b_1x1/Relu") - - << ConvolutionLayer(3U, 3U, 192U, - get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_4a_3x3/convolution") - << BatchNormalizationLayer(get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_variance.npy"), - nullptr, get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Conv2d_4a_3x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_4a_3x3/Relu") - - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("MaxPool_5a_3x3/MaxPool"); - - graph << get_inception_node_A(data_path, "Mixed_5b", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), - 32U) - .set_name("Mixed_5b/concat"); - graph << get_inception_node_A(data_path, "Mixed_5c", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), - 64U, true) - .set_name("Mixed_5c/concat"); - graph << get_inception_node_A(data_path, "Mixed_5d", weights_layout, 64U, std::make_tuple(48U, 64U), std::make_tuple(64U, 96U, 96U), - 64U) - .set_name("Mixed_5d/concat"); - - graph << get_inception_node_B(data_path, "Mixed_6a", weights_layout, 384U, std::make_tuple(64U, 96U, 96U)).set_name("Mixed_6a/concat"); + graph + << common_params.target << common_params.fast_math_hint + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)) + << ConvolutionLayer(3U, 3U, 32U, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(2, 2, 0, 0)) + .set_name("Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + nullptr, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_1a_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name("Conv2d_1a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_1a_3x3/Relu") + << ConvolutionLayer(3U, 3U, 32U, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_2a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"), + nullptr, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2a_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name("Conv2d_2a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_2a_3x3/Relu") + + << ConvolutionLayer(3U, 3U, 64U, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 1, 1)) + .set_name("Conv2d_2b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"), + nullptr, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_2b_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name("Conv2d_2b_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_2b_3x3/Relu") + + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("MaxPool_3a_3x3/MaxPool") + + << ConvolutionLayer(1U, 1U, 80U, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_3b_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_moving_variance.npy"), + nullptr, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_3b_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name("Conv2d_3b_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_3b_1x1/Relu") + + << ConvolutionLayer(3U, 3U, 192U, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_4a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_moving_variance.npy"), + nullptr, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Conv2d_4a_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name("Conv2d_4a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_4a_3x3/Relu") + + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("MaxPool_5a_3x3/MaxPool"); + + graph << get_inception_node_A(data_path, "Mixed_5b", weights_layout, 64U, std::make_tuple(48U, 64U), + std::make_tuple(64U, 96U, 96U), 32U) + .set_name("Mixed_5b/concat"); + graph << get_inception_node_A(data_path, "Mixed_5c", weights_layout, 64U, std::make_tuple(48U, 64U), + std::make_tuple(64U, 96U, 96U), 64U, true) + .set_name("Mixed_5c/concat"); + graph << get_inception_node_A(data_path, "Mixed_5d", weights_layout, 64U, std::make_tuple(48U, 64U), + std::make_tuple(64U, 96U, 96U), 64U) + .set_name("Mixed_5d/concat"); + + graph << get_inception_node_B(data_path, "Mixed_6a", weights_layout, 384U, std::make_tuple(64U, 96U, 96U)) + .set_name("Mixed_6a/concat"); graph << get_inception_node_C(data_path, "Mixed_6b", weights_layout, 192U, std::make_tuple(128U, 128U, 192U), std::make_tuple(128U, 128U, 128U, 128U, 192U), 192U) - .set_name("Mixed_6b/concat"); + .set_name("Mixed_6b/concat"); graph << get_inception_node_C(data_path, "Mixed_6c", weights_layout, 192U, std::make_tuple(160U, 160U, 192U), std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U) - .set_name("Mixed_6c/concat"); + .set_name("Mixed_6c/concat"); graph << get_inception_node_C(data_path, "Mixed_6d", weights_layout, 192U, std::make_tuple(160U, 160U, 192U), std::make_tuple(160U, 160U, 160U, 160U, 192U), 192U) - .set_name("Mixed_6d/concat"); + .set_name("Mixed_6d/concat"); graph << get_inception_node_C(data_path, "Mixed_6e", weights_layout, 192U, std::make_tuple(192U, 192U, 192U), std::make_tuple(192U, 192U, 192U, 192U, 192U), 192U) - .set_name("Mixed_6e/concat"); + .set_name("Mixed_6e/concat"); graph << get_inception_node_D(data_path, "Mixed_7a", weights_layout, std::make_tuple(192U, 320U), std::make_tuple(192U, 192U, 192U, 192U)) - .set_name("Mixed_7a/concat"); + .set_name("Mixed_7a/concat"); graph << get_inception_node_E(data_path, "Mixed_7b", weights_layout, 320U, std::make_tuple(384U, 384U, 384U), std::make_tuple(448U, 384U, 384U, 384U), 192U) - .set_name("Mixed_7b/concat"); + .set_name("Mixed_7b/concat"); graph << get_inception_node_E(data_path, "Mixed_7c", weights_layout, 320U, std::make_tuple(384U, 384U, 384U), std::make_tuple(448U, 384U, 384U, 384U), 192U, true) - .set_name("Mixed_7c/concat"); - - graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, operation_layout, PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))).set_name("Logits/AvgPool_1a_8x8/AvgPool") - << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy", weights_layout), - get_weights_accessor(data_path, - "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Logits/Conv2d_1c_1x1/convolution") + .set_name("Mixed_7c/concat"); + + graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 8, operation_layout, + PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::CEIL))) + .set_name("Logits/AvgPool_1a_8x8/AvgPool") + << ConvolutionLayer( + 1U, 1U, 1001U, + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_weights.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/inceptionv3_model/Logits_Conv2d_1c_1x1_biases.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("Logits/Conv2d_1c_1x1/convolution") << ReshapeLayer(TensorShape(1001U)).set_name("Predictions/Reshape") - << SoftmaxLayer().set_name("Predictions/Softmax") - << OutputLayer(get_output_accessor(common_params, 5)); + << SoftmaxLayer().set_name("Predictions/Softmax") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); - + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); return true; @@ -220,19 +252,21 @@ private: Stream graph; private: - ConcatLayer get_inception_node_A(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, - unsigned int a_filt, - std::tuple<unsigned int, unsigned int> b_filters, + ConcatLayer get_inception_node_A(const std::string &data_path, + std::string &¶m_path, + DataLayout weights_layout, + unsigned int a_filt, + std::tuple<unsigned int, unsigned int> b_filters, std::tuple<unsigned int, unsigned int, unsigned int> c_filters, - unsigned int d_filt, - bool is_name_different = false) + unsigned int d_filt, + bool is_name_different = false) { std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; // This is due to a naming issue in the tf model std::string conv_id0 = "_0a_"; std::string conv_id1 = "2d_0b_"; - if(is_name_different) + if (is_name_different) { conv_id0 = "_0b_"; conv_id1 = "_1_0c_"; @@ -240,457 +274,451 @@ private: SubStream i_a(graph); i_a << ConvolutionLayer( - 1U, 1U, a_filt, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); + 1U, 1U, a_filt, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); SubStream i_b(graph); i_b << ConvolutionLayer( - 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu") + 1U, 1U, std::get<0>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + "1x1_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/convolution") + << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + + "1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + + "1x1_BatchNorm_moving_variance.npy"), + nullptr, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id0 + + "1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id0 + "1x1/Relu") << ConvolutionLayer( - 5U, 5U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 2, 2)) - .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/Relu"); + 5U, 5U, std::get<1>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 2, 2)) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, + total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_moving_variance.npy"), + nullptr, + get_weights_accessor(data_path, total_path + "Branch_1_Conv" + conv_id1 + "5x5_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id1 + "5x5/Relu"); SubStream i_c(graph); i_c << ConvolutionLayer( - 1U, 1U, std::get<0>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") + 1U, 1U, std::get<0>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer( - 3U, 3U, std::get<1>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu") + 3U, 3U, std::get<1>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu") << ConvolutionLayer( - 3U, 3U, std::get<2>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm/batcnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu"); + 3U, 3U, std::get<2>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm/batcnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu"); SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), - true)) - .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, + PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") << ConvolutionLayer( - 1U, 1U, d_filt, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); + 1U, 1U, d_filt, + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } - ConcatLayer get_inception_node_B(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, - unsigned int a_filt, + ConcatLayer get_inception_node_B(const std::string &data_path, + std::string &¶m_path, + DataLayout weights_layout, + unsigned int a_filt, std::tuple<unsigned int, unsigned int, unsigned int> b_filters) { std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer( - 3U, 3U, a_filt, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_1a_1x1/Relu"); + 3U, 3U, a_filt, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_1a_1x1/Relu"); SubStream i_b(graph); i_b << ConvolutionLayer( - 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") + 1U, 1U, std::get<0>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer( - 3U, 3U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu") + 3U, 3U, std::get<1>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu") << ConvolutionLayer( - 3U, 3U, std::get<2>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_1a_1x1/Relu"); + 3U, 3U, std::get<2>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_1a_1x1/Relu"); SubStream i_c(graph); - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool"); + i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c)); } - ConcatLayer get_inception_node_C(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, - unsigned int a_filt, - std::tuple<unsigned int, unsigned int, unsigned int> b_filters, - std::tuple<unsigned int, unsigned int, unsigned int, unsigned int, unsigned int> c_filters, - unsigned int d_filt) + ConcatLayer + get_inception_node_C(const std::string &data_path, + std::string &¶m_path, + DataLayout weights_layout, + unsigned int a_filt, + std::tuple<unsigned int, unsigned int, unsigned int> b_filters, + std::tuple<unsigned int, unsigned int, unsigned int, unsigned int, unsigned int> c_filters, + unsigned int d_filt) { std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer( - 1U, 1U, a_filt, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); + 1U, 1U, a_filt, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); SubStream i_b(graph); i_b << ConvolutionLayer( - 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") + 1U, 1U, std::get<0>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer( - 7U, 1U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 3, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") + 7U, 1U, std::get<1>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") << ConvolutionLayer( - 1U, 7U, std::get<2>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 3)) - .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0c_7x1/Relu"); + 1U, 7U, std::get<2>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_0c_7x1/Relu"); SubStream i_c(graph); i_c << ConvolutionLayer( - 1U, 1U, std::get<0>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") + 1U, 1U, std::get<0>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer( - 1U, 7U, std::get<1>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 3)) - .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu") + 1U, 7U, std::get<1>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu") << ConvolutionLayer( - 7U, 1U, std::get<2>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 3, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu") + 7U, 1U, std::get<2>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu") << ConvolutionLayer( - 1U, 7U, std::get<3>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 3)) - .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu") + 1U, 7U, std::get<3>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu") << ConvolutionLayer( - 7U, 1U, std::get<4>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 3, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu"); + 7U, 1U, std::get<4>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu"); SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), - true)) - .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, + PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") << ConvolutionLayer( - 1U, 1U, d_filt, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); + 1U, 1U, d_filt, + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } - ConcatLayer get_inception_node_D(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, - std::tuple<unsigned int, unsigned int> a_filters, + ConcatLayer get_inception_node_D(const std::string &data_path, + std::string &¶m_path, + DataLayout weights_layout, + std::tuple<unsigned int, unsigned int> a_filters, std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> b_filters) { std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer( - 1U, 1U, std::get<0>(a_filters), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu") + 1U, 1U, std::get<0>(a_filters), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu") << ConvolutionLayer( - 3U, 3U, std::get<1>(a_filters), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_1a_3x3/Relu"); + 3U, 3U, std::get<1>(a_filters), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_1a_3x3/Relu"); SubStream i_b(graph); i_b << ConvolutionLayer( - 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") + 1U, 1U, std::get<0>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") << ConvolutionLayer( - 7U, 1U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 3, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") + 7U, 1U, std::get<1>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") << ConvolutionLayer( - 1U, 7U, std::get<2>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 3)) - .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu") + 1U, 7U, std::get<2>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu") << ConvolutionLayer( - 3U, 3U, std::get<3>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_1a_3x3/Relu"); + 3U, 3U, std::get<3>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_1a_3x3/Relu"); SubStream i_c(graph); - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool"); + i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name(param_path + "/Branch_2/MaxPool_1a_3x3/MaxPool"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c)); } - ConcatLayer get_inception_node_E(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, - unsigned int a_filt, - std::tuple<unsigned int, unsigned int, unsigned int> b_filters, + ConcatLayer get_inception_node_E(const std::string &data_path, + std::string &¶m_path, + DataLayout weights_layout, + unsigned int a_filt, + std::tuple<unsigned int, unsigned int, unsigned int> b_filters, std::tuple<unsigned int, unsigned int, unsigned int, unsigned int> c_filters, - unsigned int d_filt, - bool is_name_different = false) + unsigned int d_filt, + bool is_name_different = false) { // This is due to a naming issue in the tf model std::string conv_id = "_0b_"; - if(is_name_different) + if (is_name_different) { conv_id = "_0c_"; } @@ -698,154 +726,152 @@ private: std::string total_path = "/cnn_data/inceptionv3_model/" + param_path + "_"; SubStream i_a(graph); i_a << ConvolutionLayer( - 1U, 1U, a_filt, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); + 1U, 1U, a_filt, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); SubStream i_b(graph); i_b << ConvolutionLayer( - 1U, 1U, std::get<0>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu"); + 1U, 1U, std::get<0>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu"); SubStream i_b1(i_b); i_b1 << ConvolutionLayer( - 3U, 1U, std::get<1>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution") + 3U, 1U, std::get<1>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu"); + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu"); SubStream i_b2(i_b); i_b2 << ConvolutionLayer( - 1U, 3U, std::get<2>(b_filters), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 1)) - .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/Relu"); + 1U, 3U, std::get<2>(b_filters), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + "3x1_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/convolution") + << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + + "3x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + + "3x1_BatchNorm_moving_variance.npy"), + nullptr, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d" + conv_id + + "3x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d" + conv_id + "3x1/Relu"); // Merge b1 and b2 i_b << ConcatLayer(std::move(i_b1), std::move(i_b2)).set_name(param_path + "/Branch_1/concat"); SubStream i_c(graph); i_c << ConvolutionLayer( - 1U, 1U, std::get<0>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") + 1U, 1U, std::get<0>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer( - 3U, 3U, std::get<1>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu"); + 3U, 3U, std::get<1>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu"); SubStream i_c1(i_c); i_c1 << ConvolutionLayer( - 3U, 1U, std::get<2>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution") + 3U, 1U, std::get<2>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu"); + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu"); SubStream i_c2(i_c); i_c2 << ConvolutionLayer( - 1U, 3U, std::get<3>(c_filters), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 1)) - .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution") + 1U, 3U, std::get<3>(c_filters), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_3x1/Relu"); + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_3x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0d_3x1/Relu"); // Merge i_c1 and i_c2 i_c << ConcatLayer(std::move(i_c1), std::move(i_c2)).set_name(param_path + "/Branch_2/concat"); SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), - true)) - .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, + PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") << ConvolutionLayer( - 1U, 1U, d_filt, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), - nullptr, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); + 1U, 1U, d_filt, + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), + nullptr, get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), + 0.001f) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm/batchnorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } diff --git a/examples/graph_inception_v4.cpp b/examples/graph_inception_v4.cpp index c78bbb285a..052498ad38 100644 --- a/examples/graph_inception_v4.cpp +++ b/examples/graph_inception_v4.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -39,8 +39,7 @@ using namespace arm_compute::graph_utils; class InceptionV4Example final : public Example { public: - InceptionV4Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV4") + InceptionV4Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "InceptionV4") { } bool do_setup(int argc, char **argv) override @@ -53,7 +52,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -66,55 +65,74 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(299U, 299U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(299U, 299U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)) // Conv2d_1a_3x3 - << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) - .set_name("Conv2d_1a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_1a_3x3/Relu") + << ConvolutionLayer( + 3U, 3U, 32U, + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Conv2d_1a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, + "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_1a_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name("Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_1a_3x3/Relu") // Conv2d_2a_3x3 - << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv2d_2a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Conv2d_2a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2a_3x3/Relu") + << ConvolutionLayer( + 3U, 3U, 32U, + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv2d_2a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, + "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2a_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name("Conv2d_2a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_2a_3x3/Relu") // Conv2d_2b_3x3 - << ConvolutionLayer(3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name("Conv2d_2b_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Conv2d_2b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv2d_2b_3x3/Relu"); + << ConvolutionLayer( + 3U, 3U, 64U, + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_weights.npy", + weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Conv2d_2b_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, + "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Conv2d_2b_3x3_BatchNorm_beta.npy"), + 0.001f) + .set_name("Conv2d_2b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv2d_2b_3x3/Relu"); graph << get_mixed_3a(data_path, weights_layout).set_name("Mixed_3a/concat"); graph << get_mixed_4a(data_path, weights_layout).set_name("Mixed_4a/concat"); @@ -140,27 +158,30 @@ public: graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7b").set_name("Mixed_7b/concat"); graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7c").set_name("Mixed_7c/concat"); graph << get_inceptionC_block(data_path, weights_layout, "Mixed_7d").set_name("Mixed_7d/concat"); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("Logits/AvgPool_1a/AvgPool") - << FlattenLayer().set_name("Logits/Flatten") - << FullyConnectedLayer( - 1001U, - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_biases.npy")) - .set_name("Logits/MatMul") - << SoftmaxLayer().set_name("Logits/Predictions") - << OutputLayer(get_output_accessor(common_params, 5)); + graph + << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("Logits/AvgPool_1a/AvgPool") + << FlattenLayer().set_name("Logits/Flatten") + << FullyConnectedLayer( + 1001U, + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_weights.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/inceptionv4_model/Logits_Logits_biases.npy")) + .set_name("Logits/MatMul") + << SoftmaxLayer().set_name("Logits/Predictions") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; // Load the precompiled kernels from a file into the kernel library, in this way the next time they are needed // compilation won't be required. - if(common_params.enable_cl_cache) + if (common_params.enable_cl_cache) { #ifdef ARM_COMPUTE_CL restore_program_cache_from_file(); @@ -170,7 +191,7 @@ public: graph.finalize(common_params.target, config); // Save the opencl kernels to a file - if(common_opts.enable_cl_cache) + if (common_opts.enable_cl_cache) { #ifdef ARM_COMPUTE_CL save_program_cache_to_file(); @@ -197,22 +218,24 @@ private: std::string total_path = "/cnn_data/inceptionv4_model/Mixed_3a_"; SubStream i_a(graph); - i_a << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), - true)) - .set_name("Mixed_3a/Branch_0/MaxPool_0a_3x3/MaxPool"); + i_a << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) + .set_name("Mixed_3a/Branch_0/MaxPool_0a_3x3/MaxPool"); SubStream i_b(graph); - i_b << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/Relu"); + i_b << ConvolutionLayer( + 3U, 3U, 96U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_3a/Branch_1/Conv2d_0a_3x3/Relu"); return ConcatLayer(std::move(i_a), std::move(i_b)); } @@ -222,74 +245,86 @@ private: std::string total_path = "/cnn_data/inceptionv4_model/Mixed_4a_"; SubStream i_a(graph); - i_a << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/Relu"); + i_a << ConvolutionLayer( + 1U, 1U, 64U, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_4a/Branch_0/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 96U, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_4a/Branch_0/Conv2d_1a_3x3/Relu"); SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(7U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) - .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/Relu") - << ConvolutionLayer(1U, 7U, 64U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) - .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/Relu") - << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 64U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_4a/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 7U, 1U, 64U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_4a/Branch_1/Conv2d_0b_1x7/Relu") + << ConvolutionLayer( + 1U, 7U, 64U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_4a/Branch_1/Conv2d_0c_7x1/Relu") + << ConvolutionLayer( + 3U, 3U, 96U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_4a/Branch_1/Conv2d_1a_3x3/Relu"); return ConcatLayer(std::move(i_a), std::move(i_b)); } @@ -299,22 +334,24 @@ private: std::string total_path = "/cnn_data/inceptionv4_model/Mixed_5a_"; SubStream i_a(graph); - i_a << ConvolutionLayer(3U, 3U, 192U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/Relu"); + i_a << ConvolutionLayer( + 3U, 3U, 192U, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_5a/Branch_0/Conv2d_1a_3x3/Relu"); SubStream i_b(graph); - i_b << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), - true)) - .set_name("Mixed_5a/Branch_1/MaxPool_1a_3x3/MaxPool"); + i_b << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) + .set_name("Mixed_5a/Branch_1/MaxPool_1a_3x3/MaxPool"); return ConcatLayer(std::move(i_a), std::move(i_b)); } @@ -324,92 +361,106 @@ private: std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_"; SubStream i_a(graph); - i_a << ConvolutionLayer(1U, 1U, 96U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); + i_a << ConvolutionLayer( + 1U, 1U, 96U, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 64U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 96U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0b_3x3/Relu"); SubStream i_c(graph); - i_c << ConvolutionLayer(1U, 1U, 64U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu") - << ConvolutionLayer(3U, 3U, 96U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu"); + i_c << ConvolutionLayer( + 1U, 1U, 64U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 96U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x3/Relu") + << ConvolutionLayer( + 3U, 3U, 96U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0c_3x3/Relu"); SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), - true)) - .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") - << ConvolutionLayer(1U, 1U, 96U, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, + PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") + << ConvolutionLayer( + 1U, 1U, 96U, + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } @@ -419,57 +470,65 @@ private: std::string total_path = "/cnn_data/inceptionv4_model/Mixed_6a_"; SubStream i_a(graph); - i_a << ConvolutionLayer(3U, 3U, 384U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu"); + i_a << ConvolutionLayer( + 3U, 3U, 384U, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_0/Conv2d_1a_3x3/Relu"); SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 224U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) - .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu") - << ConvolutionLayer(3U, 3U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 192U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 224U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_1/Conv2d_0b_3x3/Relu") + << ConvolutionLayer( + 3U, 3U, 256U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_6a/Branch_1/Conv2d_1a_3x3/Relu"); SubStream i_c(graph); - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), - true)) - .set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool"); + i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) + .set_name("Mixed_6a/Branch_2/MaxPool_1a_3x3/MaxPool"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c)); } @@ -479,125 +538,145 @@ private: std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_"; SubStream i_a(graph); - i_a << ConvolutionLayer(1U, 1U, 384U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); + i_a << ConvolutionLayer( + 1U, 1U, 384U, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(7U, 1U, 224U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") - << ConvolutionLayer(1U, 7U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) - .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 192U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 7U, 1U, 224U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x7/Relu") + << ConvolutionLayer( + 1U, 7U, 256U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0c_7x1/Relu"); SubStream i_c(graph); - i_c << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(1U, 7U, 192U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) - .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu") - << ConvolutionLayer(7U, 1U, 224U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu") - << ConvolutionLayer(1U, 7U, 224U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) - .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu") - << ConvolutionLayer(7U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu"); + i_c << ConvolutionLayer( + 1U, 1U, 192U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 1U, 7U, 192U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_7x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0b_7x1/Relu") + << ConvolutionLayer( + 7U, 1U, 224U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x7_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x7/Relu") + << ConvolutionLayer( + 1U, 7U, 224U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_7x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0d_7x1/Relu") + << ConvolutionLayer( + 7U, 1U, 256U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_1x7_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0e_1x7/Relu"); SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), - true)) - .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") - << ConvolutionLayer(1U, 1U, 128U, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, + PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") + << ConvolutionLayer( + 1U, 1U, 128U, + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } @@ -607,79 +686,91 @@ private: std::string total_path = "/cnn_data/inceptionv4_model/Mixed_7a_"; SubStream i_a(graph); - i_a << ConvolutionLayer(1U, 1U, 192U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(3U, 3U, 192U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu"); + i_a << ConvolutionLayer( + 1U, 1U, 192U, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 3U, 3U, 192U, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_0/Conv2d_1a_3x3/Relu"); SubStream i_b(graph); - i_b << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu") - << ConvolutionLayer(7U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) - .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu") - << ConvolutionLayer(1U, 7U, 320U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) - .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu") - << ConvolutionLayer(3U, 3U, 320U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) - .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), - 0.001f) - .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu"); + i_b << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_1/Conv2d_0a_1x1/Relu") + << ConvolutionLayer( + 7U, 1U, 256U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 3, 0)) + .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x7_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_1/Conv2d_0b_1x7/Relu") + << ConvolutionLayer( + 1U, 7U, 320U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 3)) + .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_7x1_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_1/Conv2d_0c_7x1/Relu") + << ConvolutionLayer( + 3U, 3U, 320U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 0)) + .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_1a_3x3_BatchNorm_beta.npy"), 0.001f) + .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Mixed_7a/Branch_1/Conv2d_1a_3x3/Relu"); SubStream i_c(graph); - i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), - true)) - .set_name("Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool"); + i_c << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, common_params.data_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL), true)) + .set_name("Mixed_7a/Branch_2/MaxPool_1a_3x3/MaxPool"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c)); } @@ -689,163 +780,163 @@ private: std::string total_path = "/cnn_data/inceptionv4_model/" + param_path + "_"; SubStream i_a(graph); - i_a << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); + i_a << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_0_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_0/Conv2d_0a_1x1/Relu"); SubStream i_b(graph); i_b << ConvolutionLayer( - 1U, 1U, 384U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu"); + 1U, 1U, 384U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0a_1x1/Relu"); SubStream i_b1(i_b); i_b1 << ConvolutionLayer( - 3U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 0)) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Conv2D") + 3U, 1U, 256U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu"); + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0b_1x3_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0b_1x3/Relu"); SubStream i_b2(i_b); i_b2 << ConvolutionLayer( - 1U, 3U, 256U, - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 1)) - .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/Conv2D") + 1U, 3U, 256U, + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) + .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_1/Conv2d_0c_3x1/Relu"); + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_1_Conv2d_0c_3x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_1/Conv2d_0c_3x1/Relu"); // Merge b1 and b2 i_b << ConcatLayer(std::move(i_b1), std::move(i_b2)).set_name(param_path + "/Branch_1/concat"); SubStream i_c(graph); i_c << ConvolutionLayer( - 1U, 1U, 384U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") + 1U, 1U, 384U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0a_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0a_1x1/Relu") << ConvolutionLayer( - 1U, 3U, 448U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 1)) - .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/Conv2D") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0b_3x1/Relu") + 1U, 3U, 448U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0b_3x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0b_3x1/Relu") << ConvolutionLayer( - 3U, 1U, 512U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Conv2D") - << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu"); + 3U, 1U, 512U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0c_1x3_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0c_1x3/Relu"); SubStream i_c1(i_c); i_c1 << ConvolutionLayer( - 3U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 0)) - .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/Conv2D") + 3U, 1U, 256U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 0)) + .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0d_1x3/Relu"); + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0d_1x3_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0d_1x3/Relu"); SubStream i_c2(i_c); i_c2 << ConvolutionLayer( - 1U, 3U, 256U, - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 1)) - .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/Conv2D") + 1U, 3U, 256U, + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 1)) + .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_2/Conv2d_0e_3x1/Relu"); + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, + total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_2_Conv2d_0e_3x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_2/Conv2d_0e_3x1/Relu"); // Merge i_c1 and i_c2 i_c << ConcatLayer(std::move(i_c1), std::move(i_c2)).set_name(param_path + "/Branch_2/concat"); SubStream i_d(graph); - i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), - true)) - .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") - << ConvolutionLayer(1U, 1U, 256U, - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), - get_random_accessor(1.f, 1.f), - get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), - 0.001f) - .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); + i_d << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, + PadStrideInfo(1, 1, 1, 1, DimensionRoundingType::CEIL), true)) + .set_name(param_path + "/Branch_3/AvgPool_0a_3x3/AvgPool") + << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_moving_variance.npy"), + get_random_accessor(1.f, 1.f), + get_weights_accessor(data_path, total_path + "Branch_3_Conv2d_0b_1x1_BatchNorm_beta.npy"), 0.001f) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/Branch_3/Conv2d_0b_1x1/Relu"); return ConcatLayer(std::move(i_a), std::move(i_b), std::move(i_c), std::move(i_d)); } diff --git a/examples/graph_lenet.cpp b/examples/graph_lenet.cpp index 7b475c2c03..7d6dce7b17 100644 --- a/examples/graph_lenet.cpp +++ b/examples/graph_lenet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class GraphLenetExample : public Example { public: - GraphLenetExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "LeNet") + GraphLenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "LeNet") { } bool do_setup(int argc, char **argv) override @@ -49,14 +49,15 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; } // Checks - ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph"); + ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), + "QASYMM8 not supported for this graph"); // Print parameter values std::cout << common_params << std::endl; @@ -67,43 +68,39 @@ public: // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(28U, 28U, 1U, batches), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(28U, 28U, 1U, batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; //conv1 << pool1 << conv2 << pool2 << fc1 << act1 << fc2 << smx - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, get_input_accessor(common_params)) << ConvolutionLayer( - 5U, 5U, 20U, - get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv1") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool1") + 5U, 5U, 20U, get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/lenet_model/conv1_b.npy"), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv1") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool1") << ConvolutionLayer( - 5U, 5U, 50U, - get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool2") - << FullyConnectedLayer( - 500U, - get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_b.npy")) - .set_name("ip1") + 5U, 5U, 50U, get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/lenet_model/conv2_b.npy"), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool2") + << FullyConnectedLayer(500U, + get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/lenet_model/ip1_b.npy")) + .set_name("ip1") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu") - << FullyConnectedLayer( - 10U, - get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_b.npy")) - .set_name("ip2") - << SoftmaxLayer().set_name("prob") - << OutputLayer(get_output_accessor(common_params)); + << FullyConnectedLayer(10U, + get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/lenet_model/ip2_b.npy")) + .set_name("ip2") + << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params)); // Finalize graph GraphConfig config; @@ -111,6 +108,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -131,6 +129,14 @@ private: /** Main program for LeNet * + * Model is based on: + * http://yann.lecun.com/exdb/publis/pdf/lecun-98.pdf + * "Gradient-Based Learning Applied to Document Recognition" + * Yann LeCun, Léon Bottou, Yoshua Bengio, and Patrick Haffner + * + * The original model uses tanh instead of relu activations. However the use of relu activations in lenet has been + * widely adopted to improve accuracy.* + * * @note To list all the possible arguments execute the binary appended with the --help option * * @param[in] argc Number of arguments diff --git a/examples/graph_mnist.cpp b/examples/graph_mnist.cpp deleted file mode 100644 index 56d5c96282..0000000000 --- a/examples/graph_mnist.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2019-2020 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/graph.h" -#include "support/ToolchainSupport.h" -#include "utils/CommonGraphOptions.h" -#include "utils/GraphUtils.h" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace arm_compute::utils; -using namespace arm_compute::graph::frontend; -using namespace arm_compute::graph_utils; - -/** Example demonstrating how to implement Mnist's network using the Compute Library's graph API */ -class GraphMnistExample : public Example -{ -public: - GraphMnistExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "LeNet") - { - } - bool do_setup(int argc, char **argv) override - { - // Parse arguments - cmd_parser.parse(argc, argv); - cmd_parser.validate(); - - // Consume common parameters - common_params = consume_common_graph_parameters(common_opts); - - // Return when help menu is requested - if(common_params.help) - { - cmd_parser.print_help(argv[0]); - return false; - } - - // Print parameter values - std::cout << common_params << std::endl; - - // Get trainable parameters data path - std::string data_path = common_params.data_path; - - // Add model path to data path - if(!data_path.empty() && arm_compute::is_data_type_quantized_asymmetric(common_params.data_type)) - { - data_path += "/cnn_data/mnist_qasymm8_model/"; - } - - // Create input descriptor - const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(28U, 28U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); - - const QuantizationInfo in_quant_info = QuantizationInfo(0.003921568859368563f, 0); - - const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> conv_quant_info = - { - { QuantizationInfo(0.004083447158336639f, 138), QuantizationInfo(0.0046257381327450275f, 0) }, // conv0 - { QuantizationInfo(0.0048590428195893764f, 149), QuantizationInfo(0.03558270260691643f, 0) }, // conv1 - { QuantizationInfo(0.004008443560451269f, 146), QuantizationInfo(0.09117382764816284f, 0) }, // conv2 - { QuantizationInfo(0.004344311077147722f, 160), QuantizationInfo(0.5494495034217834f, 167) }, // fc - }; - - // Set weights trained layout - const DataLayout weights_layout = DataLayout::NHWC; - FullyConnectedLayerInfo fc_info = FullyConnectedLayerInfo(); - fc_info.set_weights_trained_layout(weights_layout); - - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor.set_quantization_info(in_quant_info), - get_input_accessor(common_params)) - << ConvolutionLayer( - 3U, 3U, 32U, - get_weights_accessor(data_path, "conv2d_weights_quant_FakeQuantWithMinMaxVars.npy", weights_layout), - get_weights_accessor(data_path, "conv2d_Conv2D_bias.npy"), - PadStrideInfo(1U, 1U, 1U, 1U), 1, conv_quant_info.at(0).first, conv_quant_info.at(0).second) - .set_name("Conv0") - - << ConvolutionLayer( - 3U, 3U, 32U, - get_weights_accessor(data_path, "conv2d_1_weights_quant_FakeQuantWithMinMaxVars.npy", weights_layout), - get_weights_accessor(data_path, "conv2d_1_Conv2D_bias.npy"), - PadStrideInfo(1U, 1U, 1U, 1U), 1, conv_quant_info.at(1).first, conv_quant_info.at(1).second) - .set_name("conv1") - - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("maxpool1") - - << ConvolutionLayer( - 3U, 3U, 32U, - get_weights_accessor(data_path, "conv2d_2_weights_quant_FakeQuantWithMinMaxVars.npy", weights_layout), - get_weights_accessor(data_path, "conv2d_2_Conv2D_bias.npy"), - PadStrideInfo(1U, 1U, 1U, 1U), 1, conv_quant_info.at(2).first, conv_quant_info.at(2).second) - .set_name("conv2") - - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("maxpool2") - - << FullyConnectedLayer( - 10U, - get_weights_accessor(data_path, "dense_weights_quant_FakeQuantWithMinMaxVars_transpose.npy", weights_layout), - get_weights_accessor(data_path, "dense_MatMul_bias.npy"), - fc_info, conv_quant_info.at(3).first, conv_quant_info.at(3).second) - .set_name("fc") - - << SoftmaxLayer().set_name("prob"); - - if(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type)) - { - graph << DequantizationLayer().set_name("dequantize"); - } - - graph << OutputLayer(get_output_accessor(common_params, 5)); - - // Finalize graph - GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - - graph.finalize(common_params.target, config); - - return true; - } - void do_run() override - { - // Run graph - graph.run(); - } - -private: - CommandLineParser cmd_parser; - CommonGraphOptions common_opts; - CommonGraphParams common_params; - Stream graph; -}; - -/** Main program for Mnist Example - * - * @note To list all the possible arguments execute the binary appended with the --help option - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments - */ -int main(int argc, char **argv) -{ - return arm_compute::utils::run_example<GraphMnistExample>(argc, argv); -} diff --git a/examples/graph_mobilenet.cpp b/examples/graph_mobilenet.cpp index bb893998be..e3a6ef116d 100644 --- a/examples/graph_mobilenet.cpp +++ b/examples/graph_mobilenet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -36,14 +37,13 @@ using namespace arm_compute::graph_utils; class GraphMobilenetExample : public Example { public: - GraphMobilenetExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetV1") + GraphMobilenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetV1") { // Add model id option model_id_opt = cmd_parser.add_option<SimpleOption<int>>("model-id", 0); model_id_opt->set_help("Mobilenet model id (0: 1.0_224, else: 0.75_160"); } - GraphMobilenetExample(const GraphMobilenetExample &) = delete; + GraphMobilenetExample(const GraphMobilenetExample &) = delete; GraphMobilenetExample &operator=(const GraphMobilenetExample &) = delete; ~GraphMobilenetExample() override = default; bool do_setup(int argc, char **argv) override @@ -56,7 +56,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -72,15 +72,17 @@ public: unsigned int spatial_size = (model_id == 0 || common_params.data_type == DataType::QASYMM8) ? 224 : 160; // Create input descriptor - const TensorShape tensor_shape = permute_shape(TensorShape(spatial_size, spatial_size, 3U, 1U), DataLayout::NCHW, common_params.data_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(spatial_size, spatial_size, 3U, common_params.batches), DataLayout::NCHW, + common_params.data_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); // Set graph hints - graph << common_params.target - << common_params.fast_math_hint; + graph << common_params.target << common_params.fast_math_hint; // Create core graph - if(arm_compute::is_data_type_float(common_params.data_type)) + if (arm_compute::is_data_type_float(common_params.data_type)) { create_graph_float(input_descriptor, model_id); } @@ -90,8 +92,7 @@ public: } // Create common tail - graph << ReshapeLayer(TensorShape(1001U)).set_name("Reshape") - << SoftmaxLayer().set_name("Softmax") + graph << ReshapeLayer(TensorShape(1001U)).set_name("Reshape") << SoftmaxLayer().set_name("Softmax") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph @@ -100,6 +101,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -114,63 +116,85 @@ public: private: CommandLineParser cmd_parser; CommonGraphOptions common_opts; - SimpleOption<int> *model_id_opt{ nullptr }; + SimpleOption<int> *model_id_opt{nullptr}; CommonGraphParams common_params; Stream graph; void create_graph_float(TensorDescriptor &input_descriptor, int model_id) { float depth_scale = (model_id == 0) ? 1.f : 0.75; - std::string model_path = (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/"; + std::string model_path = + (model_id == 0) ? "/cnn_data/mobilenet_v1_1_224_model/" : "/cnn_data/mobilenet_v1_075_160_model/"; // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(); // Get trainable parameters data path std::string data_path = common_params.data_path; // Add model path to data path - if(!data_path.empty()) + if (!data_path.empty()) { data_path += model_path; } - graph << InputLayer(input_descriptor, - get_input_accessor(common_params, std::move(preprocessor), false)) - << ConvolutionLayer( - 3U, 3U, 32U * depth_scale, - get_weights_accessor(data_path, "Conv2d_0_weights.npy", DataLayout::NCHW), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)) - .set_name("Conv2d_0") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, "Conv2d_0_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, "Conv2d_0_BatchNorm_beta.npy"), - 0.001f) - .set_name("Conv2d_0/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name("Conv2d_0/Relu6"); - graph << get_dwsc_node_float(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_2", 128 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_3", 128 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_4", 256 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_5", 256 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_6", 512 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_7", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_8", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_9", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_10", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_11", 512 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_12", 1024 * depth_scale, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << get_dwsc_node_float(data_path, "Conv2d_13", 1024 * depth_scale, PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), PadStrideInfo(1, 1, 0, 0)); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool_1a") - << ConvolutionLayer( - 1U, 1U, 1001U, - get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW), - get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Logits/Conv2d_1c_1x1"); + graph << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)) + << ConvolutionLayer(3U, 3U, 32U * depth_scale, + get_weights_accessor(data_path, "Conv2d_0_weights.npy", DataLayout::NCHW), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)) + .set_name("Conv2d_0") + << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "Conv2d_0_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, "Conv2d_0_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, "Conv2d_0_BatchNorm_beta.npy"), 0.001f) + .set_name("Conv2d_0/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + .set_name("Conv2d_0/Relu6"); + graph << get_dwsc_node_float(data_path, "Conv2d_1", 64 * depth_scale, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_2", 128 * depth_scale, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_3", 128 * depth_scale, + PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_4", 256 * depth_scale, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_5", 256 * depth_scale, + PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_6", 512 * depth_scale, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_7", 512 * depth_scale, + PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_8", 512 * depth_scale, + PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_9", 512 * depth_scale, + PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_10", 512 * depth_scale, + PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_11", 512 * depth_scale, + PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_12", 1024 * depth_scale, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph << get_dwsc_node_float(data_path, "Conv2d_13", 1024 * depth_scale, + PadStrideInfo(1, 1, 1, 1, 1, 1, DimensionRoundingType::CEIL), + PadStrideInfo(1, 1, 0, 0)); + graph + << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool_1a") + << ConvolutionLayer( + 1U, 1U, 1001U, get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW), + get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"), PadStrideInfo(1, 1, 0, 0)) + .set_name("Logits/Conv2d_1c_1x1"); } void create_graph_qasymm(TensorDescriptor &input_descriptor) @@ -179,7 +203,7 @@ private: std::string data_path = common_params.data_path; // Add model path to data path - if(!data_path.empty()) + if (!data_path.empty()) { data_path += "/cnn_data/mobilenet_qasymm8_model/"; } @@ -187,19 +211,16 @@ private: // Quantization info taken from the AndroidNN QASYMM8 MobileNet example const QuantizationInfo in_quant_info = QuantizationInfo(0.0078125f, 128); - const std::vector<QuantizationInfo> conv_weights_quant_info = - { + const std::vector<QuantizationInfo> conv_weights_quant_info = { QuantizationInfo(0.02182667888700962f, 151), // conv0 QuantizationInfo(0.004986600950360298f, 74) // conv14 }; - const std::vector<QuantizationInfo> conv_out_quant_info = - { + const std::vector<QuantizationInfo> conv_out_quant_info = { QuantizationInfo(0.023528477177023888f, 0), // conv0 QuantizationInfo(0.16609922051429749f, 66) // conv14 }; - const std::vector<QuantizationInfo> depth_weights_quant_info = - { + const std::vector<QuantizationInfo> depth_weights_quant_info = { QuantizationInfo(0.29219913482666016f, 110), // dwsc1 QuantizationInfo(0.40277284383773804f, 130), // dwsc2 QuantizationInfo(0.06053730100393295f, 160), // dwsc3 @@ -215,8 +236,7 @@ private: QuantizationInfo(0.12616927921772003f, 211) // dwsc13 }; - const std::vector<QuantizationInfo> point_weights_quant_info = - { + const std::vector<QuantizationInfo> point_weights_quant_info = { QuantizationInfo(0.030420949682593346f, 121), // dwsc1 QuantizationInfo(0.015148180536925793f, 104), // dwsc2 QuantizationInfo(0.013755458407104015f, 94), // dwsc3 @@ -234,108 +254,121 @@ private: graph << InputLayer(input_descriptor.set_quantization_info(in_quant_info), get_input_accessor(common_params, nullptr, false)) - << ConvolutionLayer( - 3U, 3U, 32U, - get_weights_accessor(data_path, "Conv2d_0_weights.npy"), - get_weights_accessor(data_path, "Conv2d_0_bias.npy"), - PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), - 1, conv_weights_quant_info.at(0), conv_out_quant_info.at(0)) - .set_name("Conv2d_0") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name("Conv2d_0/Relu6"); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), point_weights_quant_info.at(0)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1), - point_weights_quant_info.at(1)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2), - point_weights_quant_info.at(2)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3), - point_weights_quant_info.at(3)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4), - point_weights_quant_info.at(4)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5), - point_weights_quant_info.at(5)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6), - point_weights_quant_info.at(6)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7), - point_weights_quant_info.at(7)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8), - point_weights_quant_info.at(8)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9), - point_weights_quant_info.at(9)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10), - point_weights_quant_info.at(10)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11), - point_weights_quant_info.at(11)); - graph << get_dwsc_node_qasymm(data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12), - point_weights_quant_info.at(12)) - << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool_1a") - << ConvolutionLayer( - 1U, 1U, 1001U, - get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"), - get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_bias.npy"), - PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1), conv_out_quant_info.at(1)) - .set_name("Logits/Conv2d_1c_1x1"); + << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "Conv2d_0_weights.npy"), + get_weights_accessor(data_path, "Conv2d_0_bias.npy"), + PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), 1, + conv_weights_quant_info.at(0), conv_out_quant_info.at(0)) + .set_name("Conv2d_0") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) + .set_name("Conv2d_0/Relu6"); + graph << get_dwsc_node_qasymm(data_path, "Conv2d_1", 64U, PadStrideInfo(1U, 1U, 1U, 1U), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(0), + point_weights_quant_info.at(0)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(1), point_weights_quant_info.at(1)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(2), point_weights_quant_info.at(2)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_4", 256U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(3), point_weights_quant_info.at(3)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(4), point_weights_quant_info.at(4)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(5), point_weights_quant_info.at(5)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(6), point_weights_quant_info.at(6)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(7), point_weights_quant_info.at(7)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(8), point_weights_quant_info.at(8)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(9), point_weights_quant_info.at(9)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(10), point_weights_quant_info.at(10)); + graph << get_dwsc_node_qasymm( + data_path, "Conv2d_12", 1024U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(11), point_weights_quant_info.at(11)); + graph + << get_dwsc_node_qasymm( + data_path, "Conv2d_13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::FLOOR), + PadStrideInfo(1U, 1U, 0U, 0U), depth_weights_quant_info.at(12), point_weights_quant_info.at(12)) + << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool_1a") + << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"), + get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_bias.npy"), + PadStrideInfo(1U, 1U, 0U, 0U), 1, conv_weights_quant_info.at(1), + conv_out_quant_info.at(1)) + .set_name("Logits/Conv2d_1c_1x1"); } - ConcatLayer get_dwsc_node_float(const std::string &data_path, std::string &¶m_path, - unsigned int conv_filt, - PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info) + ConcatLayer get_dwsc_node_float(const std::string &data_path, + std::string &¶m_path, + unsigned int conv_filt, + PadStrideInfo dwc_pad_stride_info, + PadStrideInfo conv_pad_stride_info) { std::string total_path = param_path + "_"; SubStream sg(graph); sg << DepthwiseConvolutionLayer( - 3U, 3U, - get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - dwc_pad_stride_info) - .set_name(total_path + "depthwise/depthwise") + 3U, 3U, + get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), dwc_pad_stride_info) + .set_name(total_path + "depthwise/depthwise") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"), - 0.001f) - .set_name(total_path + "depthwise/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "depthwise/Relu6") - << ConvolutionLayer( - 1U, 1U, conv_filt, - get_weights_accessor(data_path, total_path + "pointwise_weights.npy", DataLayout::NCHW), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - conv_pad_stride_info) - .set_name(total_path + "pointwise/Conv2D") + get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"), 0.001f) + .set_name(total_path + "depthwise/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + .set_name(total_path + "depthwise/Relu6") + << ConvolutionLayer(1U, 1U, conv_filt, + get_weights_accessor(data_path, total_path + "pointwise_weights.npy", DataLayout::NCHW), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), conv_pad_stride_info) + .set_name(total_path + "pointwise/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_beta.npy"), - 0.001f) - .set_name(total_path + "pointwise/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "pointwise/Relu6"); + get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, total_path + "pointwise_BatchNorm_beta.npy"), 0.001f) + .set_name(total_path + "pointwise/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + .set_name(total_path + "pointwise/Relu6"); return ConcatLayer(std::move(sg)); } - ConcatLayer get_dwsc_node_qasymm(const std::string &data_path, std::string &¶m_path, + ConcatLayer get_dwsc_node_qasymm(const std::string &data_path, + std::string &¶m_path, const unsigned int conv_filt, - PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info, - QuantizationInfo depth_weights_quant_info, QuantizationInfo point_weights_quant_info) + PadStrideInfo dwc_pad_stride_info, + PadStrideInfo conv_pad_stride_info, + QuantizationInfo depth_weights_quant_info, + QuantizationInfo point_weights_quant_info) { std::string total_path = param_path + "_"; SubStream sg(graph); - sg << DepthwiseConvolutionLayer( - 3U, 3U, - get_weights_accessor(data_path, total_path + "depthwise_weights.npy"), - get_weights_accessor(data_path, total_path + "depthwise_bias.npy"), - dwc_pad_stride_info, 1, std::move(depth_weights_quant_info)) - .set_name(total_path + "depthwise/depthwise") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name(total_path + "depthwise/Relu6") - << ConvolutionLayer( - 1U, 1U, conv_filt, - get_weights_accessor(data_path, total_path + "pointwise_weights.npy"), - get_weights_accessor(data_path, total_path + "pointwise_bias.npy"), - conv_pad_stride_info, 1, std::move(point_weights_quant_info)) - .set_name(total_path + "pointwise/Conv2D") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name(total_path + "pointwise/Relu6"); + sg << DepthwiseConvolutionLayer(3U, 3U, get_weights_accessor(data_path, total_path + "depthwise_weights.npy"), + get_weights_accessor(data_path, total_path + "depthwise_bias.npy"), + dwc_pad_stride_info, 1, std::move(depth_weights_quant_info)) + .set_name(total_path + "depthwise/depthwise") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) + .set_name(total_path + "depthwise/Relu6") + << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "pointwise_weights.npy"), + get_weights_accessor(data_path, total_path + "pointwise_bias.npy"), conv_pad_stride_info, + 1, std::move(point_weights_quant_info)) + .set_name(total_path + "pointwise/Conv2D") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) + .set_name(total_path + "pointwise/Relu6"); return ConcatLayer(std::move(sg)); } diff --git a/examples/graph_mobilenet_v2.cpp b/examples/graph_mobilenet_v2.cpp index 0d6b4715c9..9bc21c42c5 100644 --- a/examples/graph_mobilenet_v2.cpp +++ b/examples/graph_mobilenet_v2.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -36,11 +37,10 @@ using namespace arm_compute::graph_utils; class GraphMobilenetV2Example : public Example { public: - GraphMobilenetV2Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetV2") + GraphMobilenetV2Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetV2") { } - GraphMobilenetV2Example(const GraphMobilenetV2Example &) = delete; + GraphMobilenetV2Example(const GraphMobilenetV2Example &) = delete; GraphMobilenetV2Example &operator=(const GraphMobilenetV2Example &) = delete; ~GraphMobilenetV2Example() override = default; @@ -54,7 +54,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -64,15 +64,16 @@ public: std::cout << common_params << std::endl; // Create input descriptor - const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), + DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); // Set graph hints - graph << common_params.target - << common_params.fast_math_hint; + graph << common_params.target << common_params.fast_math_hint; // Create core graph - if(arm_compute::is_data_type_float(common_params.data_type)) + if (arm_compute::is_data_type_float(common_params.data_type)) { create_graph_float(input_descriptor); } @@ -82,8 +83,7 @@ public: } // Create common tail graph << ReshapeLayer(TensorShape(1001U)).set_name("Predictions/Reshape") - << SoftmaxLayer().set_name("Predictions/Softmax") - << OutputLayer(get_output_accessor(common_params, 5)); + << SoftmaxLayer().set_name("Predictions/Softmax") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; @@ -91,6 +91,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -129,129 +130,149 @@ private: const std::string model_path = "/cnn_data/mobilenet_v2_1.0_224_model/"; // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(); // Get trainable parameters data path std::string data_path = common_params.data_path; // Add model path to data path - if(!data_path.empty()) + if (!data_path.empty()) { data_path += model_path; } graph << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)) - << ConvolutionLayer(3U, 3U, 32U, - get_weights_accessor(data_path, "Conv_weights.npy", DataLayout::NCHW), + << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "Conv_weights.npy", DataLayout::NCHW), std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL)) - .set_name("Conv") + .set_name("Conv") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv_BatchNorm_moving_variance.npy"), get_weights_accessor(data_path, "Conv_BatchNorm_gamma.npy"), get_weights_accessor(data_path, "Conv_BatchNorm_beta.npy"), 0.0010000000474974513f) - .set_name("Conv/BatchNorm") + .set_name("Conv/BatchNorm") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) - .set_name("Conv/Relu6"); + .set_name("Conv/Relu6"); get_expanded_conv_float(data_path, "expanded_conv", 32U, 16U, PadStrideInfo(1, 1, 1, 1)); - get_expanded_conv_float(data_path, "expanded_conv_1", 16U, 24U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes); - get_expanded_conv_float(data_path, "expanded_conv_2", 24U, 24U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); - get_expanded_conv_float(data_path, "expanded_conv_3", 24U, 32U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes); - get_expanded_conv_float(data_path, "expanded_conv_4", 32U, 32U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); - get_expanded_conv_float(data_path, "expanded_conv_5", 32U, 32U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); - get_expanded_conv_float(data_path, "expanded_conv_6", 32U, 64U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes); - get_expanded_conv_float(data_path, "expanded_conv_7", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); - get_expanded_conv_float(data_path, "expanded_conv_8", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); - get_expanded_conv_float(data_path, "expanded_conv_9", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_1", 16U, 24U, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes); + get_expanded_conv_float(data_path, "expanded_conv_2", 24U, 24U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_3", 24U, 32U, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes); + get_expanded_conv_float(data_path, "expanded_conv_4", 32U, 32U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_5", 32U, 32U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_6", 32U, 64U, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes); + get_expanded_conv_float(data_path, "expanded_conv_7", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_8", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_9", 64U, 64U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); get_expanded_conv_float(data_path, "expanded_conv_10", 64U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes); - get_expanded_conv_float(data_path, "expanded_conv_11", 96U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); - get_expanded_conv_float(data_path, "expanded_conv_12", 96U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); - get_expanded_conv_float(data_path, "expanded_conv_13", 96U, 160U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes); - get_expanded_conv_float(data_path, "expanded_conv_14", 160U, 160U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); - get_expanded_conv_float(data_path, "expanded_conv_15", 160U, 160U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_11", 96U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_12", 96U, 96U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_13", 96U, 160U, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), HasExpand::Yes); + get_expanded_conv_float(data_path, "expanded_conv_14", 160U, 160U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); + get_expanded_conv_float(data_path, "expanded_conv_15", 160U, 160U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes, + IsResidual::Yes); get_expanded_conv_float(data_path, "expanded_conv_16", 160U, 320U, PadStrideInfo(1, 1, 1, 1), HasExpand::Yes); - graph << ConvolutionLayer(1U, 1U, 1280U, - get_weights_accessor(data_path, "Conv_1_weights.npy", DataLayout::NCHW), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("Conv_1") + graph << ConvolutionLayer( + 1U, 1U, 1280U, get_weights_accessor(data_path, "Conv_1_weights.npy", DataLayout::NCHW), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("Conv_1") << BatchNormalizationLayer(get_weights_accessor(data_path, "Conv_1_BatchNorm_moving_mean.npy"), get_weights_accessor(data_path, "Conv_1_BatchNorm_moving_variance.npy"), get_weights_accessor(data_path, "Conv_1_BatchNorm_gamma.npy"), get_weights_accessor(data_path, "Conv_1_BatchNorm_beta.npy"), 0.0010000000474974513f) - .set_name("Conv_1/BatchNorm") + .set_name("Conv_1/BatchNorm") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) - .set_name("Conv_1/Relu6") + .set_name("Conv_1/Relu6") << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool") << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy", DataLayout::NCHW), get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"), PadStrideInfo(1, 1, 0, 0)) - .set_name("Logits/Conv2d_1c_1x1"); + .set_name("Logits/Conv2d_1c_1x1"); } - void get_expanded_conv_float(const std::string &data_path, std::string &¶m_path, - unsigned int input_channels, unsigned int output_channels, - PadStrideInfo dwc_pad_stride_info, - HasExpand has_expand = HasExpand::No, IsResidual is_residual = IsResidual::No, - unsigned int expansion_size = 6) + void get_expanded_conv_float(const std::string &data_path, + std::string &¶m_path, + unsigned int input_channels, + unsigned int output_channels, + PadStrideInfo dwc_pad_stride_info, + HasExpand has_expand = HasExpand::No, + IsResidual is_residual = IsResidual::No, + unsigned int expansion_size = 6) { std::string total_path = param_path + "_"; SubStream left(graph); // Add expand node - if(has_expand == HasExpand::Yes) + if (has_expand == HasExpand::Yes) { - left << ConvolutionLayer(1U, 1U, input_channels * expansion_size, - get_weights_accessor(data_path, total_path + "expand_weights.npy", DataLayout::NCHW), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/expand/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "expand_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "expand_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, total_path + "expand_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, total_path + "expand_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(param_path + "/expand/BatchNorm") + left << ConvolutionLayer( + 1U, 1U, input_channels * expansion_size, + get_weights_accessor(data_path, total_path + "expand_weights.npy", DataLayout::NCHW), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/expand/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "expand_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "expand_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, total_path + "expand_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, total_path + "expand_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(param_path + "/expand/BatchNorm") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) - .set_name(param_path + "/expand/Relu6"); + .set_name(param_path + "/expand/Relu6"); } // Add depthwise node - left << DepthwiseConvolutionLayer(3U, 3U, - get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - dwc_pad_stride_info) - .set_name(param_path + "/depthwise/depthwise") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"), - 0.0010000000474974513f) - .set_name(param_path + "/depthwise/BatchNorm") + left << DepthwiseConvolutionLayer( + 3U, 3U, + get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy", DataLayout::NCHW), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), dwc_pad_stride_info) + .set_name(param_path + "/depthwise/depthwise") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, total_path + "depthwise_BatchNorm_beta.npy"), + 0.0010000000474974513f) + .set_name(param_path + "/depthwise/BatchNorm") << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) - .set_name(param_path + "/depthwise/Relu6"); + .set_name(param_path + "/depthwise/Relu6"); // Add project node left << ConvolutionLayer(1U, 1U, output_channels, get_weights_accessor(data_path, total_path + "project_weights.npy", DataLayout::NCHW), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/project/Conv2D") - << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "project_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, total_path + "project_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, total_path + "project_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, total_path + "project_BatchNorm_beta.npy"), - 0.0010000000474974513) - .set_name(param_path + "/project/BatchNorm"); - - if(is_residual == IsResidual::Yes) + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/project/Conv2D") + << BatchNormalizationLayer( + get_weights_accessor(data_path, total_path + "project_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, total_path + "project_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, total_path + "project_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, total_path + "project_BatchNorm_beta.npy"), 0.0010000000474974513) + .set_name(param_path + "/project/BatchNorm"); + + if (is_residual == IsResidual::Yes) { // Add residual node SubStream right(graph); - graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(param_path + "/add"); + graph + << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(param_path + "/add"); } else { @@ -268,7 +289,7 @@ private: std::string data_path = common_params.data_path; // Add model path to data path - if(!data_path.empty()) + if (!data_path.empty()) { data_path += model_path; } @@ -276,16 +297,14 @@ private: const QuantizationInfo in_quant_info = QuantizationInfo(0.0078125f, 128); const QuantizationInfo mid_quant_info = QuantizationInfo(0.023528477177023888f, 128); - const std::vector<QuantizationInfo> conv_weights_quant_info = - { + const std::vector<QuantizationInfo> conv_weights_quant_info = { QuantizationInfo(0.03396892547607422f, 122), // Conv QuantizationInfo(0.005167067516595125f, 125), // Conv1 QuantizationInfo(0.0016910821432247758f, 113) // Conv2d_1c_1x1 }; // Pointwise expand convolution quantization info - const std::vector<QuantizationInfo> pwc_q = - { + const std::vector<QuantizationInfo> pwc_q = { QuantizationInfo(0.254282623529f, 129), // expand_0 (Dummy) QuantizationInfo(0.009758507832884789f, 127), // expand_1 QuantizationInfo(0.0036556976847350597f, 144), // expand_2 @@ -305,8 +324,7 @@ private: QuantizationInfo(0.002046825597062707f, 135) // expand_16 }; // Depthwise expand convolution quantization info - const std::vector<QuantizationInfo> dwc_q = - { + const std::vector<QuantizationInfo> dwc_q = { QuantizationInfo(0.3436955213546753f, 165), // expand_0 QuantizationInfo(0.020969120785593987f, 109), // expand_1 QuantizationInfo(0.16981913149356842f, 52), // expand_2 @@ -326,8 +344,7 @@ private: QuantizationInfo(0.16456253826618195, 201) // expand_16 }; // Project convolution quantization info - const std::vector<QuantizationInfo> prwc_q = - { + const std::vector<QuantizationInfo> prwc_q = { QuantizationInfo(0.03737175464630127f, 140), // expand_0 QuantizationInfo(0.0225360207259655f, 156), // expand_1 QuantizationInfo(0.02740888111293316f, 122), // expand_2 @@ -349,65 +366,84 @@ private: graph << InputLayer(input_descriptor.set_quantization_info(in_quant_info), get_weights_accessor(data_path, common_params.image)) - << ConvolutionLayer( - 3U, 3U, 32U, - get_weights_accessor(data_path, "Conv_weights.npy"), - get_weights_accessor(data_path, "Conv_bias.npy"), - PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), - 1, conv_weights_quant_info.at(0), mid_quant_info) - .set_name("Conv") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name("Conv/Relu6") - << DepthwiseConvolutionLayer(3U, 3U, - get_weights_accessor(data_path, "expanded_conv_depthwise_depthwise_weights.npy"), - get_weights_accessor(data_path, "expanded_conv_depthwise_depthwise_biases.npy"), - PadStrideInfo(1, 1, 1, 1), 1, dwc_q.at(0)) - .set_name("expanded_conv/depthwise/depthwise") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name("expanded_conv/depthwise/Relu6") - << ConvolutionLayer(1U, 1U, 16U, - get_weights_accessor(data_path, "expanded_conv_project_weights.npy"), + << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "Conv_weights.npy"), + get_weights_accessor(data_path, "Conv_bias.npy"), + PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::FLOOR), 1, + conv_weights_quant_info.at(0), mid_quant_info) + .set_name("Conv") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) + .set_name("Conv/Relu6") + << DepthwiseConvolutionLayer( + 3U, 3U, get_weights_accessor(data_path, "expanded_conv_depthwise_depthwise_weights.npy"), + get_weights_accessor(data_path, "expanded_conv_depthwise_depthwise_biases.npy"), + PadStrideInfo(1, 1, 1, 1), 1, dwc_q.at(0)) + .set_name("expanded_conv/depthwise/depthwise") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) + .set_name("expanded_conv/depthwise/Relu6") + << ConvolutionLayer(1U, 1U, 16U, get_weights_accessor(data_path, "expanded_conv_project_weights.npy"), get_weights_accessor(data_path, "expanded_conv_project_biases.npy"), PadStrideInfo(1, 1, 0, 0), 1, prwc_q.at(0)) - .set_name("expanded_conv/project/Conv2D"); - - get_expanded_conv_qasymm8(data_path, "expanded_conv_1", IsResidual::No, 96U, 24U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), - pwc_q.at(1), dwc_q.at(1), prwc_q.at(1)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_2", IsResidual::Yes, 144U, 24U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(2), dwc_q.at(2), prwc_q.at(2)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_3", IsResidual::No, 144U, 32U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), - pwc_q.at(3), dwc_q.at(3), prwc_q.at(3)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_4", IsResidual::Yes, 192U, 32U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(4), dwc_q.at(4), prwc_q.at(4)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_5", IsResidual::Yes, 192U, 32U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(5), dwc_q.at(5), prwc_q.at(5)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_6", IsResidual::No, 192U, 64U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), - pwc_q.at(6), dwc_q.at(6), prwc_q.at(6)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_7", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(7), dwc_q.at(7), prwc_q.at(7)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_8", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(8), dwc_q.at(8), prwc_q.at(8)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_9", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(9), dwc_q.at(9), prwc_q.at(9)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_10", IsResidual::No, 384U, 96U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(10), dwc_q.at(10), prwc_q.at(10)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_11", IsResidual::Yes, 576U, 96U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(11), dwc_q.at(11), prwc_q.at(11)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_12", IsResidual::Yes, 576U, 96U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(12), dwc_q.at(12), prwc_q.at(12)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_13", IsResidual::No, 576U, 160U, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), - pwc_q.at(13), dwc_q.at(13), prwc_q.at(13)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_14", IsResidual::Yes, 960U, 160U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(14), dwc_q.at(14), prwc_q.at(14)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_15", IsResidual::Yes, 960U, 160U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(15), dwc_q.at(15), prwc_q.at(15)); - get_expanded_conv_qasymm8(data_path, "expanded_conv_16", IsResidual::No, 960U, 320U, PadStrideInfo(1, 1, 1, 1), pwc_q.at(16), dwc_q.at(16), prwc_q.at(16)); - - graph << ConvolutionLayer(1U, 1U, 1280U, - get_weights_accessor(data_path, "Conv_1_weights.npy"), - get_weights_accessor(data_path, "Conv_1_biases.npy"), - PadStrideInfo(1, 1, 0, 0), 1, conv_weights_quant_info.at(1)) - .set_name("Conv_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name("Conv_1/Relu6") + .set_name("expanded_conv/project/Conv2D"); + + get_expanded_conv_qasymm8(data_path, "expanded_conv_1", IsResidual::No, 96U, 24U, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), pwc_q.at(1), + dwc_q.at(1), prwc_q.at(1)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_2", IsResidual::Yes, 144U, 24U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(2), dwc_q.at(2), prwc_q.at(2)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_3", IsResidual::No, 144U, 32U, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), pwc_q.at(3), + dwc_q.at(3), prwc_q.at(3)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_4", IsResidual::Yes, 192U, 32U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(4), dwc_q.at(4), prwc_q.at(4)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_5", IsResidual::Yes, 192U, 32U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(5), dwc_q.at(5), prwc_q.at(5)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_6", IsResidual::No, 192U, 64U, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), pwc_q.at(6), + dwc_q.at(6), prwc_q.at(6)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_7", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(7), dwc_q.at(7), prwc_q.at(7)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_8", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(8), dwc_q.at(8), prwc_q.at(8)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_9", IsResidual::Yes, 384U, 64U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(9), dwc_q.at(9), prwc_q.at(9)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_10", IsResidual::No, 384U, 96U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(10), dwc_q.at(10), prwc_q.at(10)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_11", IsResidual::Yes, 576U, 96U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(11), dwc_q.at(11), prwc_q.at(11)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_12", IsResidual::Yes, 576U, 96U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(12), dwc_q.at(12), prwc_q.at(12)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_13", IsResidual::No, 576U, 160U, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::CEIL), pwc_q.at(13), + dwc_q.at(13), prwc_q.at(13)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_14", IsResidual::Yes, 960U, 160U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(14), dwc_q.at(14), prwc_q.at(14)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_15", IsResidual::Yes, 960U, 160U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(15), dwc_q.at(15), prwc_q.at(15)); + get_expanded_conv_qasymm8(data_path, "expanded_conv_16", IsResidual::No, 960U, 320U, PadStrideInfo(1, 1, 1, 1), + pwc_q.at(16), dwc_q.at(16), prwc_q.at(16)); + + graph << ConvolutionLayer(1U, 1U, 1280U, get_weights_accessor(data_path, "Conv_1_weights.npy"), + get_weights_accessor(data_path, "Conv_1_biases.npy"), PadStrideInfo(1, 1, 0, 0), 1, + conv_weights_quant_info.at(1)) + .set_name("Conv_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) + .set_name("Conv_1/Relu6") << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, common_params.data_layout)).set_name("Logits/AvgPool") - << ConvolutionLayer(1U, 1U, 1001U, - get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"), + << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_weights.npy"), get_weights_accessor(data_path, "Logits_Conv2d_1c_1x1_biases.npy"), PadStrideInfo(1, 1, 0, 0), 1, conv_weights_quant_info.at(2)) - .set_name("Logits/Conv2d_1c_1x1"); + .set_name("Logits/Conv2d_1c_1x1"); } - void get_expanded_conv_qasymm8(const std::string &data_path, std::string &¶m_path, IsResidual is_residual, - unsigned int input_channels, unsigned int output_channels, + void get_expanded_conv_qasymm8(const std::string &data_path, + std::string &¶m_path, + IsResidual is_residual, + unsigned int input_channels, + unsigned int output_channels, PadStrideInfo dwc_pad_stride_info, - const QuantizationInfo &pwi, const QuantizationInfo &dwi, const QuantizationInfo &pji) + const QuantizationInfo &pwi, + const QuantizationInfo &dwi, + const QuantizationInfo &pji) { std::string total_path = param_path + "_"; @@ -416,25 +452,28 @@ private: get_weights_accessor(data_path, total_path + "project_weights.npy"), get_weights_accessor(data_path, total_path + "project_biases.npy"), PadStrideInfo(1, 1, 0, 0), 1, pwi) - .set_name(param_path + "/Conv2D") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name(param_path + "/Conv2D/Relu6") - << DepthwiseConvolutionLayer(3U, 3U, - get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy"), - get_weights_accessor(data_path, total_path + "depthwise_depthwise_biases.npy"), - dwc_pad_stride_info, 1, dwi) - .set_name(param_path + "/depthwise/depthwise") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)).set_name(param_path + "/depthwise/Relu6") + .set_name(param_path + "/Conv2D") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) + .set_name(param_path + "/Conv2D/Relu6") + << DepthwiseConvolutionLayer( + 3U, 3U, get_weights_accessor(data_path, total_path + "depthwise_depthwise_weights.npy"), + get_weights_accessor(data_path, total_path + "depthwise_depthwise_biases.npy"), dwc_pad_stride_info, + 1, dwi) + .set_name(param_path + "/depthwise/depthwise") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, 6.f)) + .set_name(param_path + "/depthwise/Relu6") << ConvolutionLayer(1U, 1U, output_channels, get_weights_accessor(data_path, total_path + "project_weights.npy"), get_weights_accessor(data_path, total_path + "project_biases.npy"), PadStrideInfo(1, 1, 0, 0), 1, pji) - .set_name(param_path + "/project/Conv2D"); + .set_name(param_path + "/project/Conv2D"); - if(is_residual == IsResidual::Yes) + if (is_residual == IsResidual::Yes) { // Add residual node SubStream right(graph); - graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(param_path + "/add"); + graph + << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(param_path + "/add"); } else { diff --git a/examples/graph_resnet12.cpp b/examples/graph_resnet12.cpp index 120cc9b755..80db826be5 100644 --- a/examples/graph_resnet12.cpp +++ b/examples/graph_resnet12.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -36,7 +37,12 @@ class GraphResNet12Example : public Example { public: GraphResNet12Example() - : cmd_parser(), common_opts(cmd_parser), model_input_width(nullptr), model_input_height(nullptr), common_params(), graph(0, "ResNet12") + : cmd_parser(), + common_opts(cmd_parser), + model_input_width(nullptr), + model_input_height(nullptr), + common_params(), + graph(0, "ResNet12") { model_input_width = cmd_parser.add_option<SimpleOption<unsigned int>>("image-width", 192); model_input_height = cmd_parser.add_option<SimpleOption<unsigned int>>("image-height", 128); @@ -45,7 +51,7 @@ public: model_input_width->set_help("Input image width."); model_input_height->set_help("Input image height."); } - GraphResNet12Example(const GraphResNet12Example &) = delete; + GraphResNet12Example(const GraphResNet12Example &) = delete; GraphResNet12Example &operator=(const GraphResNet12Example &) = delete; ~GraphResNet12Example() override = default; bool do_setup(int argc, char **argv) override @@ -58,7 +64,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -69,7 +75,8 @@ public: const unsigned int image_height = model_input_height->value(); // Checks - ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph"); + ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), + "QASYMM8 not supported for this graph"); // Print parameter values std::cout << common_params << std::endl; @@ -81,54 +88,51 @@ public: const std::string model_path = "/cnn_data/resnet12_model/"; // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(); // Create input descriptor - const TensorShape tensor_shape = permute_shape(TensorShape(image_width, image_height, 3U, 1U), DataLayout::NCHW, common_params.data_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(image_width, image_height, 3U, common_params.batches), DataLayout::NCHW, + common_params.data_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */)) - << ConvolutionLayer( - 9U, 9U, 64U, - get_weights_accessor(data_path, "conv1_weights.npy", weights_layout), - get_weights_accessor(data_path, "conv1_biases.npy", weights_layout), - PadStrideInfo(1, 1, 4, 4)) - .set_name("conv1/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu"); + graph << common_params.target << common_params.fast_math_hint + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), + false /* Do not convert to BGR */)) + << ConvolutionLayer(9U, 9U, 64U, get_weights_accessor(data_path, "conv1_weights.npy", weights_layout), + get_weights_accessor(data_path, "conv1_biases.npy", weights_layout), + PadStrideInfo(1, 1, 4, 4)) + .set_name("conv1/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1/Relu"); add_residual_block(data_path, "block1", weights_layout); add_residual_block(data_path, "block2", weights_layout); add_residual_block(data_path, "block3", weights_layout); add_residual_block(data_path, "block4", weights_layout); - graph << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, "conv10_weights.npy", weights_layout), - get_weights_accessor(data_path, "conv10_biases.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv10/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv10/Relu") - << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, "conv11_weights.npy", weights_layout), - get_weights_accessor(data_path, "conv11_biases.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv11/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv11/Relu") - << ConvolutionLayer( - 9U, 9U, 3U, - get_weights_accessor(data_path, "conv12_weights.npy", weights_layout), - get_weights_accessor(data_path, "conv12_biases.npy"), - PadStrideInfo(1, 1, 4, 4)) - .set_name("conv12/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH)).set_name("conv12/Tanh") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.58f, 0.5f)).set_name("conv12/Linear") - << OutputLayer(arm_compute::support::cpp14::make_unique<DummyAccessor>(0)); + graph << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, "conv10_weights.npy", weights_layout), + get_weights_accessor(data_path, "conv10_biases.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv10/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv10/Relu") + << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, "conv11_weights.npy", weights_layout), + get_weights_accessor(data_path, "conv11_biases.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv11/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv11/Relu") + << ConvolutionLayer(9U, 9U, 3U, get_weights_accessor(data_path, "conv12_weights.npy", weights_layout), + get_weights_accessor(data_path, "conv12_biases.npy"), PadStrideInfo(1, 1, 4, 4)) + .set_name("conv12/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::TANH)) + .set_name("conv12/Tanh") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 0.58f, 0.5f)) + .set_name("conv12/Linear") + << OutputLayer(std::make_unique<DummyAccessor>(0)); // Finalize graph GraphConfig config; @@ -136,6 +140,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -151,8 +156,8 @@ public: private: CommandLineParser cmd_parser; CommonGraphOptions common_opts; - SimpleOption<unsigned int> *model_input_width{ nullptr }; - SimpleOption<unsigned int> *model_input_height{ nullptr }; + SimpleOption<unsigned int> *model_input_width{nullptr}; + SimpleOption<unsigned int> *model_input_height{nullptr}; CommonGraphParams common_params; Stream graph; @@ -169,35 +174,33 @@ private: SubStream left(graph); SubStream right(graph); - right << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "conv1_biases.npy", weights_layout), - PadStrideInfo(1, 1, 1, 1)) - .set_name(unit_name + "conv1/convolution") + right << ConvolutionLayer(3U, 3U, 64U, + get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "conv1_biases.npy", weights_layout), + PadStrideInfo(1, 1, 1, 1)) + .set_name(unit_name + "conv1/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"), - 0.0000100099996416f) - .set_name(unit_name + "conv1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") - - << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "conv2_biases.npy", weights_layout), - PadStrideInfo(1, 1, 1, 1)) - .set_name(unit_name + "conv2/convolution") + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"), 0.0000100099996416f) + .set_name(unit_name + "conv1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv1/Relu") + + << ConvolutionLayer(3U, 3U, 64U, + get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "conv2_biases.npy", weights_layout), + PadStrideInfo(1, 1, 1, 1)) + .set_name(unit_name + "conv2/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"), - 0.0000100099996416f) - .set_name(unit_name + "conv2/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv2/Relu"); + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"), 0.0000100099996416f) + .set_name(unit_name + "conv2/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv2/Relu"); graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add"); } diff --git a/examples/graph_resnet50.cpp b/examples/graph_resnet50.cpp index 7af058e042..ba0f0d5fb6 100644 --- a/examples/graph_resnet50.cpp +++ b/examples/graph_resnet50.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class GraphResNetV1_50Example : public Example { public: - GraphResNetV1_50Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNetV1_50") + GraphResNetV1_50Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNetV1_50") { } bool do_setup(int argc, char **argv) override @@ -49,7 +49,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -62,36 +62,40 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } }; - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb, - false /* Do not convert to BGR */); + const std::array<float, 3> mean_rgb{{122.68f, 116.67f, 104.01f}}; + std::unique_ptr<IPreprocessor> preprocessor = + std::make_unique<CaffePreproccessor>(mean_rgb, false /* Do not convert to BGR */); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */)) + graph << common_params.target << common_params.fast_math_hint + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), + false /* Do not convert to BGR */)) << ConvolutionLayer( - 7U, 7U, 64U, - get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 3, 3)) - .set_name("conv1/convolution") + 7U, 7U, 64U, + get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 3, 3)) + .set_name("conv1/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_beta.npy"), - 0.0000100099996416f) - .set_name("conv1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool1/MaxPool"); + get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/resnet50_model/conv1_BatchNorm_beta.npy"), + 0.0000100099996416f) + .set_name("conv1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))) + .set_name("pool1/MaxPool"); add_residual_block(data_path, "block1", weights_layout, 64, 3, 2); add_residual_block(data_path, "block2", weights_layout, 128, 4, 2); @@ -100,22 +104,23 @@ public: graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("pool5") << ConvolutionLayer( - 1U, 1U, 1000U, - get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_biases.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("logits/convolution") - << FlattenLayer().set_name("predictions/Reshape") - << SoftmaxLayer().set_name("predictions/Softmax") + 1U, 1U, 1000U, + get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_weights.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/resnet50_model/logits_biases.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("logits/convolution") + << FlattenLayer().set_name("predictions/Reshape") << SoftmaxLayer().set_name("predictions/Softmax") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); @@ -134,10 +139,14 @@ private: CommonGraphParams common_params; Stream graph; - void add_residual_block(const std::string &data_path, const std::string &name, DataLayout weights_layout, - unsigned int base_depth, unsigned int num_units, unsigned int stride) + void add_residual_block(const std::string &data_path, + const std::string &name, + DataLayout weights_layout, + unsigned int base_depth, + unsigned int num_units, + unsigned int stride) { - for(unsigned int i = 0; i < num_units; ++i) + for (unsigned int i = 0; i < num_units; ++i) { std::stringstream unit_path_ss; unit_path_ss << "/cnn_data/resnet50_model/" << name << "_unit_" << (i + 1) << "_bottleneck_v1_"; @@ -149,89 +158,90 @@ private: unsigned int middle_stride = 1; - if(i == (num_units - 1)) + if (i == (num_units - 1)) { middle_stride = stride; } SubStream right(graph); - right << ConvolutionLayer( - 1U, 1U, base_depth, - get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "conv1/convolution") + right << ConvolutionLayer(1U, 1U, base_depth, + get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv1/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"), - 0.0000100099996416f) - .set_name(unit_name + "conv1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"), 0.0000100099996416f) + .set_name(unit_name + "conv1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv1/Relu") - << ConvolutionLayer( - 3U, 3U, base_depth, - get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(middle_stride, middle_stride, 1, 1)) - .set_name(unit_name + "conv2/convolution") + << ConvolutionLayer(3U, 3U, base_depth, + get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(middle_stride, middle_stride, 1, 1)) + .set_name(unit_name + "conv2/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"), - 0.0000100099996416f) - .set_name(unit_name + "conv2/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"), 0.0000100099996416f) + .set_name(unit_name + "conv2/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv1/Relu") - << ConvolutionLayer( - 1U, 1U, base_depth * 4, - get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "conv3/convolution") + << ConvolutionLayer(1U, 1U, base_depth * 4, + get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv3/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_beta.npy"), - 0.0000100099996416f) - .set_name(unit_name + "conv2/BatchNorm"); + get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv3_BatchNorm_beta.npy"), 0.0000100099996416f) + .set_name(unit_name + "conv2/BatchNorm"); - if(i == 0) + if (i == 0) { SubStream left(graph); left << ConvolutionLayer( - 1U, 1U, base_depth * 4, - get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "shortcut/convolution") + 1U, 1U, base_depth * 4, + get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "shortcut/convolution") << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_beta.npy"), - 0.0000100099996416f) - .set_name(unit_name + "shortcut/BatchNorm"); + get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "shortcut_BatchNorm_beta.npy"), + 0.0000100099996416f) + .set_name(unit_name + "shortcut/BatchNorm"); - graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add"); + graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add) + .set_name(unit_name + "add"); } - else if(middle_stride > 1) + else if (middle_stride > 1) { SubStream left(graph); - left << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, common_params.data_layout, PadStrideInfo(middle_stride, middle_stride, 0, 0), true)).set_name(unit_name + "shortcut/MaxPool"); + left << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, common_params.data_layout, + PadStrideInfo(middle_stride, middle_stride, 0, 0), true)) + .set_name(unit_name + "shortcut/MaxPool"); - graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add"); + graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add) + .set_name(unit_name + "add"); } else { SubStream left(graph); - graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add"); + graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add) + .set_name(unit_name + "add"); } - graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Relu"); } } }; diff --git a/examples/graph_resnet_v2_50.cpp b/examples/graph_resnet_v2_50.cpp index 7d6b9aa3fd..48cf9b0b3c 100644 --- a/examples/graph_resnet_v2_50.cpp +++ b/examples/graph_resnet_v2_50.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class GraphResNetV2_50Example : public Example { public: - GraphResNetV2_50Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNetV2_50") + GraphResNetV2_50Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNetV2_50") { } bool do_setup(int argc, char **argv) override @@ -49,7 +49,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -61,64 +61,63 @@ public: // Get trainable parameters data path std::string data_path = common_params.data_path; std::string model_path = "/cnn_data/resnet_v2_50_model/"; - if(!data_path.empty()) + if (!data_path.empty()) { data_path += model_path; } // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */)) - << ConvolutionLayer( - 7U, 7U, 64U, - get_weights_accessor(data_path, "conv1_weights.npy", weights_layout), - get_weights_accessor(data_path, "conv1_biases.npy", weights_layout), - PadStrideInfo(2, 2, 3, 3)) - .set_name("conv1/convolution") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool1/MaxPool"); + graph << common_params.target << common_params.fast_math_hint + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), + false /* Do not convert to BGR */)) + << ConvolutionLayer(7U, 7U, 64U, get_weights_accessor(data_path, "conv1_weights.npy", weights_layout), + get_weights_accessor(data_path, "conv1_biases.npy", weights_layout), + PadStrideInfo(2, 2, 3, 3)) + .set_name("conv1/convolution") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))) + .set_name("pool1/MaxPool"); add_residual_block(data_path, "block1", weights_layout, 64, 3, 2); add_residual_block(data_path, "block2", weights_layout, 128, 4, 2); add_residual_block(data_path, "block3", weights_layout, 256, 6, 2); add_residual_block(data_path, "block4", weights_layout, 512, 3, 1); - graph << BatchNormalizationLayer( - get_weights_accessor(data_path, "postnorm_moving_mean.npy"), - get_weights_accessor(data_path, "postnorm_moving_variance.npy"), - get_weights_accessor(data_path, "postnorm_gamma.npy"), - get_weights_accessor(data_path, "postnorm_beta.npy"), - 0.000009999999747378752f) - .set_name("postnorm/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("postnorm/Relu") + graph << BatchNormalizationLayer(get_weights_accessor(data_path, "postnorm_moving_mean.npy"), + get_weights_accessor(data_path, "postnorm_moving_variance.npy"), + get_weights_accessor(data_path, "postnorm_gamma.npy"), + get_weights_accessor(data_path, "postnorm_beta.npy"), 0.000009999999747378752f) + .set_name("postnorm/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("postnorm/Relu") << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("pool5") - << ConvolutionLayer( - 1U, 1U, 1001U, - get_weights_accessor(data_path, "logits_weights.npy", weights_layout), - get_weights_accessor(data_path, "logits_biases.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("logits/convolution") - << FlattenLayer().set_name("predictions/Reshape") - << SoftmaxLayer().set_name("predictions/Softmax") + << ConvolutionLayer(1U, 1U, 1001U, get_weights_accessor(data_path, "logits_weights.npy", weights_layout), + get_weights_accessor(data_path, "logits_biases.npy"), PadStrideInfo(1, 1, 0, 0)) + .set_name("logits/convolution") + << FlattenLayer().set_name("predictions/Reshape") << SoftmaxLayer().set_name("predictions/Softmax") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); @@ -137,10 +136,14 @@ private: CommonGraphParams common_params; Stream graph; - void add_residual_block(const std::string &data_path, const std::string &name, DataLayout weights_layout, - unsigned int base_depth, unsigned int num_units, unsigned int stride) + void add_residual_block(const std::string &data_path, + const std::string &name, + DataLayout weights_layout, + unsigned int base_depth, + unsigned int num_units, + unsigned int stride) { - for(unsigned int i = 0; i < num_units; ++i) + for (unsigned int i = 0; i < num_units; ++i) { // Generate unit names std::stringstream unit_path_ss; @@ -152,7 +155,8 @@ private: std::string unit_name = unit_name_ss.str(); const TensorShape last_shape = graph.graph().node(graph.tail_node())->output(0)->desc().shape; - unsigned int depth_in = last_shape[arm_compute::get_data_layout_dimension_index(common_params.data_layout, DataLayoutDimension::CHANNEL)]; + unsigned int depth_in = last_shape[arm_compute::get_data_layout_dimension_index( + common_params.data_layout, DataLayoutDimension::CHANNEL)]; unsigned int depth_out = base_depth * 4; // All units have stride 1 apart from last one @@ -160,73 +164,76 @@ private: // Preact SubStream preact(graph); - preact << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_path + "preact_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "preact_moving_variance.npy"), - get_weights_accessor(data_path, unit_path + "preact_gamma.npy"), - get_weights_accessor(data_path, unit_path + "preact_beta.npy"), - 0.000009999999747378752f) - .set_name(unit_name + "preact/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "preact/Relu"); + preact << BatchNormalizationLayer(get_weights_accessor(data_path, unit_path + "preact_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "preact_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "preact_gamma.npy"), + get_weights_accessor(data_path, unit_path + "preact_beta.npy"), + 0.000009999999747378752f) + .set_name(unit_name + "preact/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "preact/Relu"); // Create bottleneck path SubStream shortcut(graph); - if(depth_in == depth_out) + if (depth_in == depth_out) { - if(middle_stride != 1) + if (middle_stride != 1) { - shortcut << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, common_params.data_layout, PadStrideInfo(middle_stride, middle_stride, 0, 0), true)).set_name(unit_name + "shortcut/MaxPool"); + shortcut << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 1, common_params.data_layout, + PadStrideInfo(middle_stride, middle_stride, 0, 0), true)) + .set_name(unit_name + "shortcut/MaxPool"); } } else { shortcut.forward_tail(preact.tail_node()); shortcut << ConvolutionLayer( - 1U, 1U, depth_out, - get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "shortcut_biases.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "shortcut/convolution"); + 1U, 1U, depth_out, + get_weights_accessor(data_path, unit_path + "shortcut_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "shortcut_biases.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "shortcut/convolution"); } // Create residual path SubStream residual(preact); - residual << ConvolutionLayer( - 1U, 1U, base_depth, - get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "conv1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"), - 0.000009999999747378752f) - .set_name(unit_name + "conv1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") - << ConvolutionLayer( - 3U, 3U, base_depth, - get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(middle_stride, middle_stride, 1, 1)) - .set_name(unit_name + "conv2/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"), - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"), - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"), - get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"), - 0.000009999999747378752f) - .set_name(unit_name + "conv2/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") - << ConvolutionLayer( - 1U, 1U, depth_out, - get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "conv3_biases.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "conv3/convolution"); - - graph << EltwiseLayer(std::move(shortcut), std::move(residual), EltwiseOperation::Add).set_name(unit_name + "add"); + residual + << ConvolutionLayer(1U, 1U, base_depth, + get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv1/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv1_BatchNorm_beta.npy"), + 0.000009999999747378752f) + .set_name(unit_name + "conv1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv1/Relu") + << ConvolutionLayer(3U, 3U, base_depth, + get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(middle_stride, middle_stride, 1, 1)) + .set_name(unit_name + "conv2/convolution") + << BatchNormalizationLayer( + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_mean.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_moving_variance.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_gamma.npy"), + get_weights_accessor(data_path, unit_path + "conv2_BatchNorm_beta.npy"), + 0.000009999999747378752f) + .set_name(unit_name + "conv2/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv1/Relu") + << ConvolutionLayer(1U, 1U, depth_out, + get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "conv3_biases.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv3/convolution"); + + graph << EltwiseLayer(std::move(shortcut), std::move(residual), EltwiseOperation::Add) + .set_name(unit_name + "add"); } } }; diff --git a/examples/graph_resnext50.cpp b/examples/graph_resnext50.cpp index 2c50594b0c..12a1507c4c 100644 --- a/examples/graph_resnext50.cpp +++ b/examples/graph_resnext50.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class GraphResNeXt50Example : public Example { public: - GraphResNeXt50Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNeXt50") + GraphResNeXt50Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ResNeXt50") { } bool do_setup(int argc, char **argv) override @@ -49,14 +49,15 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; } // Checks - ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph"); + ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), + "QASYMM8 not supported for this graph"); // Print parameter values std::cout << common_params << std::endl; @@ -66,28 +67,33 @@ public: // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, get_input_accessor(common_params)) << ScaleLayer(get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_mul.npy"), get_weights_accessor(data_path, "/cnn_data/resnext50_model/bn_data_add.npy")) - .set_name("bn_data/Scale") + .set_name("bn_data/Scale") << ConvolutionLayer( - 7U, 7U, 64U, - get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_biases.npy"), - PadStrideInfo(2, 2, 2, 3, 2, 3, DimensionRoundingType::FLOOR)) - .set_name("conv0/Convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))).set_name("pool0"); - - add_residual_block(data_path, weights_layout, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, /*stride_conv_unit1*/ 1); + 7U, 7U, 64U, + get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_weights.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/resnext50_model/conv0_biases.npy"), + PadStrideInfo(2, 2, 2, 3, 2, 3, DimensionRoundingType::FLOOR)) + .set_name("conv0/Convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv0/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR))) + .set_name("pool0"); + + add_residual_block(data_path, weights_layout, /*ofm*/ 256, /*stage*/ 1, /*num_unit*/ 3, + /*stride_conv_unit1*/ 1); add_residual_block(data_path, weights_layout, 512, 2, 4, 2); add_residual_block(data_path, weights_layout, 1024, 3, 6, 2); add_residual_block(data_path, weights_layout, 2048, 4, 3, 2); @@ -102,6 +108,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -120,10 +127,14 @@ private: CommonGraphParams common_params; Stream graph; - void add_residual_block(const std::string &data_path, DataLayout weights_layout, - unsigned int base_depth, unsigned int stage, unsigned int num_units, unsigned int stride_conv_unit1) + void add_residual_block(const std::string &data_path, + DataLayout weights_layout, + unsigned int base_depth, + unsigned int stage, + unsigned int num_units, + unsigned int stride_conv_unit1) { - for(unsigned int i = 0; i < num_units; ++i) + for (unsigned int i = 0; i < num_units; ++i) { std::stringstream unit_path_ss; unit_path_ss << "/cnn_data/resnext50_model/stage" << stage << "_unit" << (i + 1) << "_"; @@ -134,54 +145,55 @@ private: std::string unit_name = unit_name_ss.str(); PadStrideInfo pad_grouped_conv(1, 1, 1, 1); - if(i == 0) + if (i == 0) { - pad_grouped_conv = (stage == 1) ? PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 1, 1) : PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 1, 0, 1, DimensionRoundingType::FLOOR); + pad_grouped_conv = (stage == 1) ? PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 1, 1) + : PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 1, 0, 1, + DimensionRoundingType::FLOOR); } SubStream right(graph); - right << ConvolutionLayer( - 1U, 1U, base_depth / 2, - get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "conv1_biases.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "conv1/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv1/Relu") - - << ConvolutionLayer( - 3U, 3U, base_depth / 2, - get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - pad_grouped_conv, 32) - .set_name(unit_name + "conv2/convolution") + right << ConvolutionLayer(1U, 1U, base_depth / 2, + get_weights_accessor(data_path, unit_path + "conv1_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "conv1_biases.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv1/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv1/Relu") + + << ConvolutionLayer(3U, 3U, base_depth / 2, + get_weights_accessor(data_path, unit_path + "conv2_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), pad_grouped_conv, + 32) + .set_name(unit_name + "conv2/convolution") << ScaleLayer(get_weights_accessor(data_path, unit_path + "bn2_mul.npy"), get_weights_accessor(data_path, unit_path + "bn2_add.npy")) - .set_name(unit_name + "conv1/Scale") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "conv2/Relu") + .set_name(unit_name + "conv1/Scale") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "conv2/Relu") - << ConvolutionLayer( - 1U, 1U, base_depth, - get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout), - get_weights_accessor(data_path, unit_path + "conv3_biases.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name(unit_name + "conv3/convolution"); + << ConvolutionLayer(1U, 1U, base_depth, + get_weights_accessor(data_path, unit_path + "conv3_weights.npy", weights_layout), + get_weights_accessor(data_path, unit_path + "conv3_biases.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(unit_name + "conv3/convolution"); SubStream left(graph); - if(i == 0) + if (i == 0) { - left << ConvolutionLayer( - 1U, 1U, base_depth, - get_weights_accessor(data_path, unit_path + "sc_weights.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 0)) - .set_name(unit_name + "sc/convolution") + left << ConvolutionLayer(1U, 1U, base_depth, + get_weights_accessor(data_path, unit_path + "sc_weights.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(stride_conv_unit1, stride_conv_unit1, 0, 0)) + .set_name(unit_name + "sc/convolution") << ScaleLayer(get_weights_accessor(data_path, unit_path + "sc_bn_mul.npy"), get_weights_accessor(data_path, unit_path + "sc_bn_add.npy")) - .set_name(unit_name + "sc/scale"); + .set_name(unit_name + "sc/scale"); } graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name(unit_name + "add"); - graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "Relu"); + graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "Relu"); } } }; @@ -191,7 +203,7 @@ private: * Model is based on: * https://arxiv.org/abs/1611.05431 * "Aggregated Residual Transformations for Deep Neural Networks" - * Saining Xie, Ross Girshick, Piotr Dollar, Zhuowen Tu, Kaiming He + * Saining Xie, Ross Girshick, Piotr Dollar, Zhuowen Tu, Kaiming He. * * @note To list all the possible arguments execute the binary appended with the --help option * diff --git a/examples/graph_shufflenet.cpp b/examples/graph_shufflenet.cpp index 0b977982b5..513d95884e 100644 --- a/examples/graph_shufflenet.cpp +++ b/examples/graph_shufflenet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class ShuffleNetExample : public Example { public: - ShuffleNetExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ShuffleNet") + ShuffleNetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "ShuffleNet") { } bool do_setup(int argc, char **argv) override @@ -49,20 +49,21 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; } // Set default layout if needed (Single kernel grouped convolution not yet supported int NHWC) - if(!common_opts.data_layout->is_set()) + if (!common_opts.data_layout->is_set()) { common_params.data_layout = DataLayout::NHWC; } // Checks - ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph"); + ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), + "QASYMM8 not supported for this graph"); // Print parameter values std::cout << common_params << std::endl; @@ -75,40 +76,40 @@ public: std::string data_path = common_params.data_path; // Add model path to data path - if(!data_path.empty()) + if (!data_path.empty()) { data_path += model_path; } // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; // Create preprocessor - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(0); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(0); - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */)) - << ConvolutionLayer( - 3U, 3U, 24U, - get_weights_accessor(data_path, "conv3_0_w_0.npy", weights_layout), - get_weights_accessor(data_path, "conv3_0_b_0.npy", weights_layout), - PadStrideInfo(2, 2, 1, 1)) - .set_name("Conv1/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "conv3_0_bn_rm_0.npy"), - get_weights_accessor(data_path, "conv3_0_bn_riv_0.npy"), - get_weights_accessor(data_path, "conv3_0_bn_s_0.npy"), - get_weights_accessor(data_path, "conv3_0_bn_b_0.npy"), - 1e-5f) - .set_name("Conv1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Conv1/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 1, 1))).set_name("pool1/MaxPool"); + graph << common_params.target << common_params.fast_math_hint + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), + false /* Do not convert to BGR */)) + << ConvolutionLayer(3U, 3U, 24U, get_weights_accessor(data_path, "conv3_0_w_0.npy", weights_layout), + get_weights_accessor(data_path, "conv3_0_b_0.npy", weights_layout), + PadStrideInfo(2, 2, 1, 1)) + .set_name("Conv1/convolution") + << BatchNormalizationLayer(get_weights_accessor(data_path, "conv3_0_bn_rm_0.npy"), + get_weights_accessor(data_path, "conv3_0_bn_riv_0.npy"), + get_weights_accessor(data_path, "conv3_0_bn_s_0.npy"), + get_weights_accessor(data_path, "conv3_0_bn_b_0.npy"), 1e-5f) + .set_name("Conv1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("Conv1/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 1, 1))) + .set_name("pool1/MaxPool"); // Stage 2 add_residual_block(data_path, DataLayout::NCHW, 0U /* unit */, 112U /* depth */, 2U /* stride */); @@ -134,13 +135,10 @@ public: graph << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("predictions/AvgPool") << FlattenLayer().set_name("predictions/Reshape") - << FullyConnectedLayer( - 1000U, - get_weights_accessor(data_path, "pred_w_0.npy", weights_layout), - get_weights_accessor(data_path, "pred_b_0.npy")) - .set_name("predictions/FC") - << SoftmaxLayer().set_name("predictions/Softmax") - << OutputLayer(get_output_accessor(common_params, 5)); + << FullyConnectedLayer(1000U, get_weights_accessor(data_path, "pred_w_0.npy", weights_layout), + get_weights_accessor(data_path, "pred_b_0.npy")) + .set_name("predictions/FC") + << SoftmaxLayer().set_name("predictions/Softmax") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; @@ -148,6 +146,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -166,8 +165,11 @@ private: CommonGraphParams common_params; Stream graph; - void add_residual_block(const std::string &data_path, DataLayout weights_layout, - unsigned int unit, unsigned int depth, unsigned int stride) + void add_residual_block(const std::string &data_path, + DataLayout weights_layout, + unsigned int unit, + unsigned int depth, + unsigned int stride) { PadStrideInfo dwc_info = PadStrideInfo(1, 1, 1, 1); const unsigned int gconv_id = unit * 2; @@ -180,63 +182,61 @@ private: SubStream left_ss(graph); SubStream right_ss(graph); - if(stride == 2) + if (stride == 2) { - right_ss << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(2, 2, 1, 1))).set_name(unit_name + "/pool_1/AveragePool"); + right_ss << PoolingLayer( + PoolingLayerInfo(PoolingType::AVG, 3, common_params.data_layout, PadStrideInfo(2, 2, 1, 1))) + .set_name(unit_name + "/pool_1/AveragePool"); dwc_info = PadStrideInfo(2, 2, 1, 1); } - left_ss << ConvolutionLayer( - 1U, 1U, depth, - get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_w_0.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0), num_groups) - .set_name(unit_name + "/gconv1_" + gconv_id_name + "/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_rm_0.npy"), - get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_riv_0.npy"), - get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_s_0.npy"), - get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_b_0.npy"), - 1e-5f) - .set_name(unit_name + "/gconv1_" + gconv_id_name + "/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "/gconv1_" + gconv_id_name + "/Relu") - << ChannelShuffleLayer(num_groups).set_name(unit_name + "/shuffle_0/ChannelShufle") - << DepthwiseConvolutionLayer( - 3U, 3U, - get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_w_0.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - dwc_info) - .set_name(unit_name + "/gconv3_" + unit_id_name + "/depthwise") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_rm_0.npy"), - get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_riv_0.npy"), - get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_s_0.npy"), - get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_b_0.npy"), - 1e-5f) - .set_name(unit_name + "/gconv3_" + unit_id_name + "/BatchNorm") - << ConvolutionLayer( - 1U, 1U, depth, - get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_w_0.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0), num_groups) - .set_name(unit_name + "/gconv1_" + gconv_id_1_name + "/convolution") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_rm_0.npy"), - get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_riv_0.npy"), - get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_s_0.npy"), - get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_b_0.npy"), - 1e-5f) - .set_name(unit_name + "/gconv1_" + gconv_id_1_name + "/BatchNorm"); + left_ss + << ConvolutionLayer(1U, 1U, depth, + get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_w_0.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(1, 1, 0, 0), num_groups) + .set_name(unit_name + "/gconv1_" + gconv_id_name + "/convolution") + << BatchNormalizationLayer(get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_rm_0.npy"), + get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_riv_0.npy"), + get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_s_0.npy"), + get_weights_accessor(data_path, "gconv1_" + gconv_id_name + "_bn_b_0.npy"), + 1e-5f) + .set_name(unit_name + "/gconv1_" + gconv_id_name + "/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "/gconv1_" + gconv_id_name + "/Relu") + << ChannelShuffleLayer(num_groups).set_name(unit_name + "/shuffle_0/ChannelShufle") + << DepthwiseConvolutionLayer( + 3U, 3U, get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_w_0.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), dwc_info) + .set_name(unit_name + "/gconv3_" + unit_id_name + "/depthwise") + << BatchNormalizationLayer(get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_rm_0.npy"), + get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_riv_0.npy"), + get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_s_0.npy"), + get_weights_accessor(data_path, "gconv3_" + unit_id_name + "_bn_b_0.npy"), 1e-5f) + .set_name(unit_name + "/gconv3_" + unit_id_name + "/BatchNorm") + << ConvolutionLayer( + 1U, 1U, depth, + get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_w_0.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0), num_groups) + .set_name(unit_name + "/gconv1_" + gconv_id_1_name + "/convolution") + << BatchNormalizationLayer(get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_rm_0.npy"), + get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_riv_0.npy"), + get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_s_0.npy"), + get_weights_accessor(data_path, "gconv1_" + gconv_id_1_name + "_bn_b_0.npy"), + 1e-5f) + .set_name(unit_name + "/gconv1_" + gconv_id_1_name + "/BatchNorm"); - if(stride == 2) + if (stride == 2) { graph << ConcatLayer(std::move(left_ss), std::move(right_ss)).set_name(unit_name + "/Concat"); } else { - graph << EltwiseLayer(std::move(left_ss), std::move(right_ss), EltwiseOperation::Add).set_name(unit_name + "/Add"); + graph << EltwiseLayer(std::move(left_ss), std::move(right_ss), EltwiseOperation::Add) + .set_name(unit_name + "/Add"); } - graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(unit_name + "/Relu"); + graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(unit_name + "/Relu"); } }; diff --git a/examples/graph_squeezenet.cpp b/examples/graph_squeezenet.cpp index 35fceb4e98..7d0528f805 100644 --- a/examples/graph_squeezenet.cpp +++ b/examples/graph_squeezenet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class GraphSqueezenetExample : public Example { public: - GraphSqueezenetExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1") + GraphSqueezenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1") { } bool do_setup(int argc, char **argv) override @@ -49,7 +49,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -62,113 +62,139 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } }; - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); + const std::array<float, 3> mean_rgb{{122.68f, 116.67f, 104.01f}}; + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) << ConvolutionLayer( - 7U, 7U, 96U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_b.npy"), - PadStrideInfo(2, 2, 0, 0)) - .set_name("conv1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu_conv1") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool1") + 7U, 7U, 96U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv1_b.npy"), + PadStrideInfo(2, 2, 0, 0)) + .set_name("conv1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("relu_conv1") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool1") << ConvolutionLayer( - 1U, 1U, 16U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire2/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire2/relu_squeeze1x1"); + 1U, 1U, 16U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire2_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire2/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire2/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U).set_name("fire2/concat"); graph << ConvolutionLayer( - 1U, 1U, 16U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire3/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire3/relu_squeeze1x1"); + 1U, 1U, 16U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire3_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire3/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire3/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U).set_name("fire3/concat"); graph << ConvolutionLayer( - 1U, 1U, 32U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire4/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire4/relu_squeeze1x1"); + 1U, 1U, 32U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire4_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire4/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire4/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U).set_name("fire4/concat"); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool4") + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool4") << ConvolutionLayer( - 1U, 1U, 32U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire5/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire5/relu_squeeze1x1"); + 1U, 1U, 32U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire5_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire5/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire5/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U).set_name("fire5/concat"); graph << ConvolutionLayer( - 1U, 1U, 48U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire6/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire6/relu_squeeze1x1"); + 1U, 1U, 48U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire6_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire6/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire6/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U).set_name("fire6/concat"); graph << ConvolutionLayer( - 1U, 1U, 48U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire7/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire7/relu_squeeze1x1"); + 1U, 1U, 48U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire7_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire7/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire7/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U).set_name("fire7/concat"); graph << ConvolutionLayer( - 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire8/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire8/relu_squeeze1x1"); + 1U, 1U, 64U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire8_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire8/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire8/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U).set_name("fire8/concat"); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool8") + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool8") << ConvolutionLayer( - 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire9/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire9/relu_squeeze1x1"); + 1U, 1U, 64U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/fire9_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire9/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire9/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U).set_name("fire9/concat"); graph << ConvolutionLayer( - 1U, 1U, 1000U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv10") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu_conv10") + 1U, 1U, 1000U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1.0_model/conv10_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("conv10") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("relu_conv10") << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("pool10") - << FlattenLayer().set_name("flatten") - << SoftmaxLayer().set_name("prob") + << FlattenLayer().set_name("flatten") << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); @@ -186,27 +212,30 @@ private: CommonGraphParams common_params; Stream graph; - ConcatLayer get_expand_fire_node(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, - unsigned int expand1_filt, unsigned int expand3_filt) + ConcatLayer get_expand_fire_node(const std::string &data_path, + std::string &¶m_path, + DataLayout weights_layout, + unsigned int expand1_filt, + unsigned int expand3_filt) { std::string total_path = "/cnn_data/squeezenet_v1.0_model/" + param_path + "_"; SubStream i_a(graph); - i_a << ConvolutionLayer( - 1U, 1U, expand1_filt, - get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "expand1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/expand1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_expand1x1"); + i_a << ConvolutionLayer(1U, 1U, expand1_filt, + get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "expand1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/expand1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_expand1x1"); SubStream i_b(graph); - i_b << ConvolutionLayer( - 3U, 3U, expand3_filt, - get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "expand3x3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/expand3x3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_expand3x3"); + i_b << ConvolutionLayer(3U, 3U, expand3_filt, + get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "expand3x3_b.npy"), + PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/expand3x3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_expand3x3"); return ConcatLayer(std::move(i_a), std::move(i_b)); } diff --git a/examples/graph_squeezenet_v1_1.cpp b/examples/graph_squeezenet_v1_1.cpp index f648b6337d..ed0f692db2 100644 --- a/examples/graph_squeezenet_v1_1.cpp +++ b/examples/graph_squeezenet_v1_1.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class GraphSqueezenet_v1_1Example : public Example { public: - GraphSqueezenet_v1_1Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1.1") + GraphSqueezenet_v1_1Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "SqueezeNetV1.1") { } bool do_setup(int argc, char **argv) override @@ -49,7 +49,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -62,113 +62,139 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - const std::array<float, 3> mean_rgb{ { 122.68f, 116.67f, 104.01f } }; - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); + const std::array<float, 3> mean_rgb{{122.68f, 116.67f, 104.01f}}; + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(227U, 227U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(227U, 227U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_b.npy"), - PadStrideInfo(2, 2, 0, 0)) - .set_name("conv1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu_conv1") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool1") + 3U, 3U, 64U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv1_b.npy"), + PadStrideInfo(2, 2, 0, 0)) + .set_name("conv1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("relu_conv1") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool1") << ConvolutionLayer( - 1U, 1U, 16U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire2/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire2/relu_squeeze1x1"); + 1U, 1U, 16U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire2_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire2/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire2/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire2", weights_layout, 64U, 64U).set_name("fire2/concat"); graph << ConvolutionLayer( - 1U, 1U, 16U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire3/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire3/relu_squeeze1x1"); + 1U, 1U, 16U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire3_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire3/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire3/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire3", weights_layout, 64U, 64U).set_name("fire3/concat"); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool3") + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool3") << ConvolutionLayer( - 1U, 1U, 32U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire4/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire4/relu_squeeze1x1"); + 1U, 1U, 32U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire4_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire4/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire4/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire4", weights_layout, 128U, 128U).set_name("fire4/concat"); graph << ConvolutionLayer( - 1U, 1U, 32U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire5/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire5/relu_squeeze1x1"); + 1U, 1U, 32U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire5_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire5/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire5/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire5", weights_layout, 128U, 128U).set_name("fire5/concat"); - graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))).set_name("pool5") + graph << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 3, operation_layout, + PadStrideInfo(2, 2, 0, 0, DimensionRoundingType::CEIL))) + .set_name("pool5") << ConvolutionLayer( - 1U, 1U, 48U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire6/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire6/relu_squeeze1x1"); + 1U, 1U, 48U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire6_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire6/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire6/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire6", weights_layout, 192U, 192U).set_name("fire6/concat"); graph << ConvolutionLayer( - 1U, 1U, 48U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire7/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire7/relu_squeeze1x1"); + 1U, 1U, 48U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire7_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire7/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire7/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire7", weights_layout, 192U, 192U).set_name("fire7/concat"); graph << ConvolutionLayer( - 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire8/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire8/relu_squeeze1x1"); + 1U, 1U, 64U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire8_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire8/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire8/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire8", weights_layout, 256U, 256U).set_name("fire8/concat"); graph << ConvolutionLayer( - 1U, 1U, 64U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("fire9/squeeze1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("fire9/relu_squeeze1x1"); + 1U, 1U, 64U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_w.npy", + weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/fire9_squeeze1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("fire9/squeeze1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("fire9/relu_squeeze1x1"); graph << get_expand_fire_node(data_path, "fire9", weights_layout, 256U, 256U).set_name("fire9/concat"); graph << ConvolutionLayer( - 1U, 1U, 1000U, - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv10") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("relu_conv10") + 1U, 1U, 1000U, + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/squeezenet_v1_1_model/conv10_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name("conv10") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("relu_conv10") << PoolingLayer(PoolingLayerInfo(PoolingType::AVG, operation_layout)).set_name("pool10") - << FlattenLayer().set_name("flatten") - << SoftmaxLayer().set_name("prob") + << FlattenLayer().set_name("flatten") << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); @@ -186,27 +212,30 @@ private: CommonGraphParams common_params; Stream graph; - ConcatLayer get_expand_fire_node(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, - unsigned int expand1_filt, unsigned int expand3_filt) + ConcatLayer get_expand_fire_node(const std::string &data_path, + std::string &¶m_path, + DataLayout weights_layout, + unsigned int expand1_filt, + unsigned int expand3_filt) { std::string total_path = "/cnn_data/squeezenet_v1_1_model/" + param_path + "_"; SubStream i_a(graph); - i_a << ConvolutionLayer( - 1U, 1U, expand1_filt, - get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "expand1x1_b.npy"), - PadStrideInfo(1, 1, 0, 0)) - .set_name(param_path + "/expand1x1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_expand1x1"); + i_a << ConvolutionLayer(1U, 1U, expand1_filt, + get_weights_accessor(data_path, total_path + "expand1x1_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "expand1x1_b.npy"), + PadStrideInfo(1, 1, 0, 0)) + .set_name(param_path + "/expand1x1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_expand1x1"); SubStream i_b(graph); - i_b << ConvolutionLayer( - 3U, 3U, expand3_filt, - get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout), - get_weights_accessor(data_path, total_path + "expand3x3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name(param_path + "/expand3x3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "/relu_expand3x3"); + i_b << ConvolutionLayer(3U, 3U, expand3_filt, + get_weights_accessor(data_path, total_path + "expand3x3_w.npy", weights_layout), + get_weights_accessor(data_path, total_path + "expand3x3_b.npy"), + PadStrideInfo(1, 1, 1, 1)) + .set_name(param_path + "/expand3x3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "/relu_expand3x3"); return ConcatLayer(std::move(i_a), std::move(i_b)); } diff --git a/examples/graph_srcnn955.cpp b/examples/graph_srcnn955.cpp index 18921065d7..15a8b5d8ec 100644 --- a/examples/graph_srcnn955.cpp +++ b/examples/graph_srcnn955.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -36,7 +37,12 @@ class GraphSRCNN955Example : public Example { public: GraphSRCNN955Example() - : cmd_parser(), common_opts(cmd_parser), model_input_width(nullptr), model_input_height(nullptr), common_params(), graph(0, "SRCNN955") + : cmd_parser(), + common_opts(cmd_parser), + model_input_width(nullptr), + model_input_height(nullptr), + common_params(), + graph(0, "SRCNN955") { model_input_width = cmd_parser.add_option<SimpleOption<unsigned int>>("image-width", 300); model_input_height = cmd_parser.add_option<SimpleOption<unsigned int>>("image-height", 300); @@ -45,7 +51,7 @@ public: model_input_width->set_help("Input image width."); model_input_height->set_help("Input image height."); } - GraphSRCNN955Example(const GraphSRCNN955Example &) = delete; + GraphSRCNN955Example(const GraphSRCNN955Example &) = delete; GraphSRCNN955Example &operator=(const GraphSRCNN955Example &) = delete; ~GraphSRCNN955Example() override = default; bool do_setup(int argc, char **argv) override @@ -58,7 +64,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -78,48 +84,47 @@ public: const std::string model_path = "/cnn_data/srcnn955_model/"; // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(); // Create input descriptor - const TensorShape tensor_shape = permute_shape(TensorShape(image_width, image_height, 3U, 1U), DataLayout::NCHW, common_params.data_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(image_width, image_height, 3U, common_params.batches), DataLayout::NCHW, + common_params.data_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false /* Do not convert to BGR */)) - << ConvolutionLayer( - 9U, 9U, 64U, - get_weights_accessor(data_path, "conv1_weights.npy", weights_layout), - get_weights_accessor(data_path, "conv1_biases.npy"), - PadStrideInfo(1, 1, 4, 4)) - .set_name("conv1/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1/Relu") - << ConvolutionLayer( - 5U, 5U, 32U, - get_weights_accessor(data_path, "conv2_weights.npy", weights_layout), - get_weights_accessor(data_path, "conv2_biases.npy"), - PadStrideInfo(1, 1, 2, 2)) - .set_name("conv2/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2/Relu") - << ConvolutionLayer( - 5U, 5U, 3U, - get_weights_accessor(data_path, "conv3_weights.npy", weights_layout), - get_weights_accessor(data_path, "conv3_biases.npy"), - PadStrideInfo(1, 1, 2, 2)) - .set_name("conv3/convolution") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3/Relu") - << OutputLayer(arm_compute::support::cpp14::make_unique<DummyAccessor>(0)); + graph << common_params.target << common_params.fast_math_hint + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), + false /* Do not convert to BGR */)) + << ConvolutionLayer(9U, 9U, 64U, get_weights_accessor(data_path, "conv1_weights.npy", weights_layout), + get_weights_accessor(data_path, "conv1_biases.npy"), PadStrideInfo(1, 1, 4, 4)) + .set_name("conv1/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1/Relu") + << ConvolutionLayer(5U, 5U, 32U, get_weights_accessor(data_path, "conv2_weights.npy", weights_layout), + get_weights_accessor(data_path, "conv2_biases.npy"), PadStrideInfo(1, 1, 2, 2)) + .set_name("conv2/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv2/Relu") + << ConvolutionLayer(5U, 5U, 3U, get_weights_accessor(data_path, "conv3_weights.npy", weights_layout), + get_weights_accessor(data_path, "conv3_biases.npy"), PadStrideInfo(1, 1, 2, 2)) + .set_name("conv3/convolution") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3/Relu") + << OutputLayer(std::make_unique<DummyAccessor>(0)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); @@ -135,8 +140,8 @@ public: private: CommandLineParser cmd_parser; CommonGraphOptions common_opts; - SimpleOption<unsigned int> *model_input_width{ nullptr }; - SimpleOption<unsigned int> *model_input_height{ nullptr }; + SimpleOption<unsigned int> *model_input_width{nullptr}; + SimpleOption<unsigned int> *model_input_height{nullptr}; CommonGraphParams common_params; Stream graph; }; diff --git a/examples/graph_ssd_mobilenet.cpp b/examples/graph_ssd_mobilenet.cpp index f2a8b30bb2..6218d47dd6 100644 --- a/examples/graph_ssd_mobilenet.cpp +++ b/examples/graph_ssd_mobilenet.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -36,23 +38,26 @@ using namespace arm_compute::graph_utils; class GraphSSDMobilenetExample : public Example { public: - GraphSSDMobilenetExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetSSD") + GraphSSDMobilenetExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "MobileNetSSD") { // Add topk option keep_topk_opt = cmd_parser.add_option<SimpleOption<int>>("topk", 100); keep_topk_opt->set_help("Top k detections results per image. Used for data type F32."); // Add output option detection_boxes_opt = cmd_parser.add_option<SimpleOption<std::string>>("detection_boxes_opt", ""); - detection_boxes_opt->set_help("Filename containing the reference values for the graph output detection_boxes. Used for data type QASYMM8."); + detection_boxes_opt->set_help("Filename containing the reference values for the graph output detection_boxes. " + "Used for data type QASYMM8."); detection_classes_opt = cmd_parser.add_option<SimpleOption<std::string>>("detection_classes_opt", ""); - detection_classes_opt->set_help("Filename containing the reference values for the output detection_classes. Used for data type QASYMM8."); + detection_classes_opt->set_help( + "Filename containing the reference values for the output detection_classes. Used for data type QASYMM8."); detection_scores_opt = cmd_parser.add_option<SimpleOption<std::string>>("detection_scores_opt", ""); - detection_scores_opt->set_help("Filename containing the reference values for the output detection_scores. Used for data type QASYMM8."); + detection_scores_opt->set_help( + "Filename containing the reference values for the output detection_scores. Used for data type QASYMM8."); num_detections_opt = cmd_parser.add_option<SimpleOption<std::string>>("num_detections_opt", ""); - num_detections_opt->set_help("Filename containing the reference values for the output num_detections. Used with datatype QASYMM8."); + num_detections_opt->set_help( + "Filename containing the reference values for the output num_detections. Used with datatype QASYMM8."); } - GraphSSDMobilenetExample(const GraphSSDMobilenetExample &) = delete; + GraphSSDMobilenetExample(const GraphSSDMobilenetExample &) = delete; GraphSSDMobilenetExample &operator=(const GraphSSDMobilenetExample &) = delete; ~GraphSSDMobilenetExample() override = default; bool do_setup(int argc, char **argv) override @@ -65,7 +70,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -75,15 +80,16 @@ public: std::cout << common_params << std::endl; // Create input descriptor - const TensorShape tensor_shape = permute_shape(TensorShape(300, 300, 3U, 1U), DataLayout::NCHW, common_params.data_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(300, 300, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); // Set graph hints - graph << common_params.target - << common_params.fast_math_hint; + graph << common_params.target << common_params.fast_math_hint; // Create core graph - if(arm_compute::is_data_type_float(common_params.data_type)) + if (arm_compute::is_data_type_float(common_params.data_type)) { create_graph_float(input_descriptor); } @@ -97,6 +103,7 @@ public: config.num_threads = common_params.threads; config.use_tuner = common_params.enable_tuner; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -111,99 +118,98 @@ public: private: CommandLineParser cmd_parser; CommonGraphOptions common_opts; - SimpleOption<int> *keep_topk_opt{ nullptr }; + SimpleOption<int> *keep_topk_opt{nullptr}; CommonGraphParams common_params; Stream graph; - SimpleOption<std::string> *detection_boxes_opt{ nullptr }; - SimpleOption<std::string> *detection_classes_opt{ nullptr }; - SimpleOption<std::string> *detection_scores_opt{ nullptr }; - SimpleOption<std::string> *num_detections_opt{ nullptr }; - - ConcatLayer get_node_A_float(IStream &master_graph, const std::string &data_path, std::string &¶m_path, - unsigned int conv_filt, - PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info) + SimpleOption<std::string> *detection_boxes_opt{nullptr}; + SimpleOption<std::string> *detection_classes_opt{nullptr}; + SimpleOption<std::string> *detection_scores_opt{nullptr}; + SimpleOption<std::string> *num_detections_opt{nullptr}; + + ConcatLayer get_node_A_float(IStream &main_graph, + const std::string &data_path, + std::string &¶m_path, + unsigned int conv_filt, + PadStrideInfo dwc_pad_stride_info, + PadStrideInfo conv_pad_stride_info) { const std::string total_path = param_path + "_"; - SubStream sg(master_graph); - - sg << DepthwiseConvolutionLayer( - 3U, 3U, - get_weights_accessor(data_path, total_path + "dw_w.npy"), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - dwc_pad_stride_info) - .set_name(param_path + "/dw") + SubStream sg(main_graph); + + sg << DepthwiseConvolutionLayer(3U, 3U, get_weights_accessor(data_path, total_path + "dw_w.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + dwc_pad_stride_info) + .set_name(param_path + "/dw") << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "dw_bn_mean.npy"), get_weights_accessor(data_path, total_path + "dw_bn_var.npy"), get_weights_accessor(data_path, total_path + "dw_scale_w.npy"), get_weights_accessor(data_path, total_path + "dw_scale_b.npy"), 0.00001f) - .set_name(param_path + "/dw/bn") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "dw/relu") - - << ConvolutionLayer( - 1U, 1U, conv_filt, - get_weights_accessor(data_path, total_path + "w.npy"), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - conv_pad_stride_info) - .set_name(param_path + "/pw") + .set_name(param_path + "/dw/bn") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "dw/relu") + + << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "w.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), conv_pad_stride_info) + .set_name(param_path + "/pw") << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "bn_mean.npy"), get_weights_accessor(data_path, total_path + "bn_var.npy"), get_weights_accessor(data_path, total_path + "scale_w.npy"), get_weights_accessor(data_path, total_path + "scale_b.npy"), 0.00001f) - .set_name(param_path + "/pw/bn") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(param_path + "pw/relu"); + .set_name(param_path + "/pw/bn") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(param_path + "pw/relu"); return ConcatLayer(std::move(sg)); } - ConcatLayer get_node_B_float(IStream &master_graph, const std::string &data_path, std::string &¶m_path, - unsigned int conv_filt, - PadStrideInfo conv_pad_stride_info_1, PadStrideInfo conv_pad_stride_info_2) + ConcatLayer get_node_B_float(IStream &main_graph, + const std::string &data_path, + std::string &¶m_path, + unsigned int conv_filt, + PadStrideInfo conv_pad_stride_info_1, + PadStrideInfo conv_pad_stride_info_2) { const std::string total_path = param_path + "_"; - SubStream sg(master_graph); - - sg << ConvolutionLayer( - 1, 1, conv_filt / 2, - get_weights_accessor(data_path, total_path + "1_w.npy"), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - conv_pad_stride_info_1) - .set_name(total_path + "1/conv") + SubStream sg(main_graph); + + sg << ConvolutionLayer(1, 1, conv_filt / 2, get_weights_accessor(data_path, total_path + "1_w.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), conv_pad_stride_info_1) + .set_name(total_path + "1/conv") << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "1_bn_mean.npy"), get_weights_accessor(data_path, total_path + "1_bn_var.npy"), get_weights_accessor(data_path, total_path + "1_scale_w.npy"), get_weights_accessor(data_path, total_path + "1_scale_b.npy"), 0.00001f) - .set_name(total_path + "1/bn") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(total_path + "1/relu"); - - sg << ConvolutionLayer( - 3, 3, conv_filt, - get_weights_accessor(data_path, total_path + "2_w.npy"), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - conv_pad_stride_info_2) - .set_name(total_path + "2/conv") + .set_name(total_path + "1/bn") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(total_path + "1/relu"); + + sg << ConvolutionLayer(3, 3, conv_filt, get_weights_accessor(data_path, total_path + "2_w.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), conv_pad_stride_info_2) + .set_name(total_path + "2/conv") << BatchNormalizationLayer(get_weights_accessor(data_path, total_path + "2_bn_mean.npy"), get_weights_accessor(data_path, total_path + "2_bn_var.npy"), get_weights_accessor(data_path, total_path + "2_scale_w.npy"), get_weights_accessor(data_path, total_path + "2_scale_b.npy"), 0.00001f) - .set_name(total_path + "2/bn") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(total_path + "2/relu"); + .set_name(total_path + "2/bn") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(total_path + "2/relu"); return ConcatLayer(std::move(sg)); } - ConcatLayer get_node_C_float(IStream &master_graph, const std::string &data_path, std::string &¶m_path, - unsigned int conv_filt, PadStrideInfo conv_pad_stride_info) + ConcatLayer get_node_C_float(IStream &main_graph, + const std::string &data_path, + std::string &¶m_path, + unsigned int conv_filt, + PadStrideInfo conv_pad_stride_info) { const std::string total_path = param_path + "_"; - SubStream sg(master_graph); - sg << ConvolutionLayer( - 1U, 1U, conv_filt, - get_weights_accessor(data_path, total_path + "w.npy"), - get_weights_accessor(data_path, total_path + "b.npy"), - conv_pad_stride_info) - .set_name(param_path + "/conv"); - if(common_params.data_layout == DataLayout::NCHW) + SubStream sg(main_graph); + sg << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "w.npy"), + get_weights_accessor(data_path, total_path + "b.npy"), conv_pad_stride_info) + .set_name(param_path + "/conv"); + if (common_params.data_layout == DataLayout::NCHW) { sg << PermuteLayer(PermutationVector(2U, 0U, 1U), DataLayout::NHWC).set_name(param_path + "/perm"); } @@ -215,62 +221,77 @@ private: void create_graph_float(TensorDescriptor &input_descriptor) { // Create a preprocessor object - const std::array<float, 3> mean_rgb{ { 127.5f, 127.5f, 127.5f } }; - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb, true, 0.007843f); + const std::array<float, 3> mean_rgb{{127.5f, 127.5f, 127.5f}}; + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb, true, 0.007843f); // Get trainable parameters data path std::string data_path = common_params.data_path; // Add model path to data path - if(!data_path.empty()) + if (!data_path.empty()) { data_path += "/cnn_data/ssd_mobilenet_model/"; } - graph << InputLayer(input_descriptor, - get_input_accessor(common_params, std::move(preprocessor))); + graph << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))); SubStream conv_11(graph); - conv_11 << ConvolutionLayer( - 3U, 3U, 32U, - get_weights_accessor(data_path, "conv0_w.npy"), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 1, 1)) - .set_name("conv0"); + conv_11 << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "conv0_w.npy"), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), + PadStrideInfo(2, 2, 1, 1)) + .set_name("conv0"); conv_11 << BatchNormalizationLayer(get_weights_accessor(data_path, "conv0_bn_mean.npy"), get_weights_accessor(data_path, "conv0_bn_var.npy"), get_weights_accessor(data_path, "conv0_scale_w.npy"), get_weights_accessor(data_path, "conv0_scale_b.npy"), 0.00001f) - .set_name("conv0/bn") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/relu"); - - conv_11 << get_node_A_float(conv_11, data_path, "conv1", 64, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv2", 128, PadStrideInfo(2, 2, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv3", 128, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv4", 256, PadStrideInfo(2, 2, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv5", 256, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv6", 512, PadStrideInfo(2, 2, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv7", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv8", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv9", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv10", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_11 << get_node_A_float(conv_11, data_path, "conv11", 512, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); + .set_name("conv0/bn") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv0/relu"); + + conv_11 << get_node_A_float(conv_11, data_path, "conv1", 64, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv2", 128, PadStrideInfo(2, 2, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv3", 128, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv4", 256, PadStrideInfo(2, 2, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv5", 256, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv6", 512, PadStrideInfo(2, 2, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv7", 512, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv8", 512, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv9", 512, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv10", 512, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_11 << get_node_A_float(conv_11, data_path, "conv11", 512, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); SubStream conv_13(conv_11); - conv_13 << get_node_A_float(conv_11, data_path, "conv12", 1024, PadStrideInfo(2, 2, 1, 1), PadStrideInfo(1, 1, 0, 0)); - conv_13 << get_node_A_float(conv_13, data_path, "conv13", 1024, PadStrideInfo(1, 1, 1, 1), PadStrideInfo(1, 1, 0, 0)); + conv_13 << get_node_A_float(conv_11, data_path, "conv12", 1024, PadStrideInfo(2, 2, 1, 1), + PadStrideInfo(1, 1, 0, 0)); + conv_13 << get_node_A_float(conv_13, data_path, "conv13", 1024, PadStrideInfo(1, 1, 1, 1), + PadStrideInfo(1, 1, 0, 0)); SubStream conv_14(conv_13); - conv_14 << get_node_B_float(conv_13, data_path, "conv14", 512, PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 2, 1, 1)); + conv_14 << get_node_B_float(conv_13, data_path, "conv14", 512, PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(2, 2, 1, 1)); SubStream conv_15(conv_14); - conv_15 << get_node_B_float(conv_14, data_path, "conv15", 256, PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 2, 1, 1)); + conv_15 << get_node_B_float(conv_14, data_path, "conv15", 256, PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(2, 2, 1, 1)); SubStream conv_16(conv_15); - conv_16 << get_node_B_float(conv_15, data_path, "conv16", 256, PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 2, 1, 1)); + conv_16 << get_node_B_float(conv_15, data_path, "conv16", 256, PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(2, 2, 1, 1)); SubStream conv_17(conv_16); - conv_17 << get_node_B_float(conv_16, data_path, "conv17", 128, PadStrideInfo(1, 1, 0, 0), PadStrideInfo(2, 2, 1, 1)); + conv_17 << get_node_B_float(conv_16, data_path, "conv17", 128, PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(2, 2, 1, 1)); //mbox_loc SubStream conv_11_mbox_loc(conv_11); @@ -292,8 +313,9 @@ private: conv_17_2_mbox_loc << get_node_C_float(conv_17, data_path, "conv17_2_mbox_loc", 24, PadStrideInfo(1, 1, 0, 0)); SubStream mbox_loc(graph); - mbox_loc << ConcatLayer(std::move(conv_11_mbox_loc), std::move(conv_13_mbox_loc), conv_14_2_mbox_loc, std::move(conv_15_2_mbox_loc), - std::move(conv_16_2_mbox_loc), std::move(conv_17_2_mbox_loc)); + mbox_loc << ConcatLayer(std::move(conv_11_mbox_loc), std::move(conv_13_mbox_loc), conv_14_2_mbox_loc, + std::move(conv_15_2_mbox_loc), std::move(conv_16_2_mbox_loc), + std::move(conv_17_2_mbox_loc)); //mbox_conf SubStream conv_11_mbox_conf(conv_11); @@ -303,67 +325,79 @@ private: conv_13_mbox_conf << get_node_C_float(conv_13, data_path, "conv13_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0)); SubStream conv_14_2_mbox_conf(conv_14); - conv_14_2_mbox_conf << get_node_C_float(conv_14, data_path, "conv14_2_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0)); + conv_14_2_mbox_conf << get_node_C_float(conv_14, data_path, "conv14_2_mbox_conf", 126, + PadStrideInfo(1, 1, 0, 0)); SubStream conv_15_2_mbox_conf(conv_15); - conv_15_2_mbox_conf << get_node_C_float(conv_15, data_path, "conv15_2_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0)); + conv_15_2_mbox_conf << get_node_C_float(conv_15, data_path, "conv15_2_mbox_conf", 126, + PadStrideInfo(1, 1, 0, 0)); SubStream conv_16_2_mbox_conf(conv_16); - conv_16_2_mbox_conf << get_node_C_float(conv_16, data_path, "conv16_2_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0)); + conv_16_2_mbox_conf << get_node_C_float(conv_16, data_path, "conv16_2_mbox_conf", 126, + PadStrideInfo(1, 1, 0, 0)); SubStream conv_17_2_mbox_conf(conv_17); - conv_17_2_mbox_conf << get_node_C_float(conv_17, data_path, "conv17_2_mbox_conf", 126, PadStrideInfo(1, 1, 0, 0)); + conv_17_2_mbox_conf << get_node_C_float(conv_17, data_path, "conv17_2_mbox_conf", 126, + PadStrideInfo(1, 1, 0, 0)); SubStream mbox_conf(graph); - mbox_conf << ConcatLayer(std::move(conv_11_mbox_conf), std::move(conv_13_mbox_conf), std::move(conv_14_2_mbox_conf), - std::move(conv_15_2_mbox_conf), std::move(conv_16_2_mbox_conf), std::move(conv_17_2_mbox_conf)); + mbox_conf << ConcatLayer(std::move(conv_11_mbox_conf), std::move(conv_13_mbox_conf), + std::move(conv_14_2_mbox_conf), std::move(conv_15_2_mbox_conf), + std::move(conv_16_2_mbox_conf), std::move(conv_17_2_mbox_conf)); mbox_conf << ReshapeLayer(TensorShape(21U, 1917U)).set_name("mbox_conf/reshape"); mbox_conf << SoftmaxLayer().set_name("mbox_conf/softmax"); mbox_conf << FlattenLayer().set_name("mbox_conf/flat"); - const std::vector<float> priorbox_variances = { 0.1f, 0.1f, 0.2f, 0.2f }; + const std::vector<float> priorbox_variances = {0.1f, 0.1f, 0.2f, 0.2f}; const float priorbox_offset = 0.5f; - const std::vector<float> priorbox_aspect_ratios = { 2.f, 3.f }; + const std::vector<float> priorbox_aspect_ratios = {2.f, 3.f}; //mbox_priorbox branch SubStream conv_11_mbox_priorbox(conv_11); conv_11_mbox_priorbox << PriorBoxLayer(SubStream(graph), - PriorBoxLayerInfo({ 60.f }, priorbox_variances, priorbox_offset, true, false, {}, { 2.f })) - .set_name("conv11/priorbox"); + PriorBoxLayerInfo({60.f}, priorbox_variances, priorbox_offset, true, + false, {}, {2.f})) + .set_name("conv11/priorbox"); SubStream conv_13_mbox_priorbox(conv_13); conv_13_mbox_priorbox << PriorBoxLayer(SubStream(graph), - PriorBoxLayerInfo({ 105.f }, priorbox_variances, priorbox_offset, true, false, { 150.f }, priorbox_aspect_ratios)) - .set_name("conv13/priorbox"); + PriorBoxLayerInfo({105.f}, priorbox_variances, priorbox_offset, true, + false, {150.f}, priorbox_aspect_ratios)) + .set_name("conv13/priorbox"); SubStream conv_14_2_mbox_priorbox(conv_14); conv_14_2_mbox_priorbox << PriorBoxLayer(SubStream(graph), - PriorBoxLayerInfo({ 150.f }, priorbox_variances, priorbox_offset, true, false, { 195.f }, priorbox_aspect_ratios)) - .set_name("conv14/priorbox"); + PriorBoxLayerInfo({150.f}, priorbox_variances, priorbox_offset, true, + false, {195.f}, priorbox_aspect_ratios)) + .set_name("conv14/priorbox"); SubStream conv_15_2_mbox_priorbox(conv_15); conv_15_2_mbox_priorbox << PriorBoxLayer(SubStream(graph), - PriorBoxLayerInfo({ 195.f }, priorbox_variances, priorbox_offset, true, false, { 240.f }, priorbox_aspect_ratios)) - .set_name("conv15/priorbox"); + PriorBoxLayerInfo({195.f}, priorbox_variances, priorbox_offset, true, + false, {240.f}, priorbox_aspect_ratios)) + .set_name("conv15/priorbox"); SubStream conv_16_2_mbox_priorbox(conv_16); conv_16_2_mbox_priorbox << PriorBoxLayer(SubStream(graph), - PriorBoxLayerInfo({ 240.f }, priorbox_variances, priorbox_offset, true, false, { 285.f }, priorbox_aspect_ratios)) - .set_name("conv16/priorbox"); + PriorBoxLayerInfo({240.f}, priorbox_variances, priorbox_offset, true, + false, {285.f}, priorbox_aspect_ratios)) + .set_name("conv16/priorbox"); SubStream conv_17_2_mbox_priorbox(conv_17); conv_17_2_mbox_priorbox << PriorBoxLayer(SubStream(graph), - PriorBoxLayerInfo({ 285.f }, priorbox_variances, priorbox_offset, true, false, { 300.f }, priorbox_aspect_ratios)) - .set_name("conv17/priorbox"); + PriorBoxLayerInfo({285.f}, priorbox_variances, priorbox_offset, true, + false, {300.f}, priorbox_aspect_ratios)) + .set_name("conv17/priorbox"); SubStream mbox_priorbox(graph); mbox_priorbox << ConcatLayer( - (common_params.data_layout == DataLayout::NCHW) ? arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::WIDTH) : arm_compute::graph::descriptors::ConcatLayerDescriptor( - DataLayoutDimension::CHANNEL), - std::move(conv_11_mbox_priorbox), std::move(conv_13_mbox_priorbox), std::move(conv_14_2_mbox_priorbox), - std::move(conv_15_2_mbox_priorbox), std::move(conv_16_2_mbox_priorbox), std::move(conv_17_2_mbox_priorbox)); + (common_params.data_layout == DataLayout::NCHW) + ? arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::WIDTH) + : arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::CHANNEL), + std::move(conv_11_mbox_priorbox), std::move(conv_13_mbox_priorbox), std::move(conv_14_2_mbox_priorbox), + std::move(conv_15_2_mbox_priorbox), std::move(conv_16_2_mbox_priorbox), std::move(conv_17_2_mbox_priorbox)); const int num_classes = 21; const bool share_location = true; @@ -376,77 +410,85 @@ private: SubStream detection_ouput(mbox_loc); detection_ouput << DetectionOutputLayer(std::move(mbox_conf), std::move(mbox_priorbox), - DetectionOutputLayerInfo(num_classes, share_location, detection_type, keep_top_k, nms_threshold, top_k, label_id_background, conf_thrs)); - detection_ouput << OutputLayer(get_detection_output_accessor(common_params, { input_descriptor.shape })); + DetectionOutputLayerInfo(num_classes, share_location, detection_type, + keep_top_k, nms_threshold, top_k, + label_id_background, conf_thrs)); + detection_ouput << OutputLayer(get_detection_output_accessor(common_params, {input_descriptor.shape})); } - ConcatLayer get_node_A_qasymm(IStream &master_graph, const std::string &data_path, std::string &¶m_path, - unsigned int conv_filt, - PadStrideInfo dwc_pad_stride_info, PadStrideInfo conv_pad_stride_info, - std::pair<QuantizationInfo, QuantizationInfo> depth_quant_info, std::pair<QuantizationInfo, QuantizationInfo> point_quant_info) + ConcatLayer get_node_A_qasymm(IStream &main_graph, + const std::string &data_path, + std::string &¶m_path, + unsigned int conv_filt, + PadStrideInfo dwc_pad_stride_info, + PadStrideInfo conv_pad_stride_info, + std::pair<QuantizationInfo, QuantizationInfo> depth_quant_info, + std::pair<QuantizationInfo, QuantizationInfo> point_quant_info) { const std::string total_path = param_path + "_"; - SubStream sg(master_graph); - - sg << DepthwiseConvolutionLayer( - 3U, 3U, - get_weights_accessor(data_path, total_path + "dw_w.npy"), - get_weights_accessor(data_path, total_path + "dw_b.npy"), - dwc_pad_stride_info, 1, depth_quant_info.first, depth_quant_info.second) - .set_name(param_path + "/dw") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(param_path + "/dw/relu6"); - - sg << ConvolutionLayer( - 1U, 1U, conv_filt, - get_weights_accessor(data_path, total_path + "w.npy"), - get_weights_accessor(data_path, total_path + "b.npy"), - conv_pad_stride_info, 1, point_quant_info.first, point_quant_info.second) - .set_name(param_path + "/pw") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(param_path + "/pw/relu6"); + SubStream sg(main_graph); + + sg << DepthwiseConvolutionLayer(3U, 3U, get_weights_accessor(data_path, total_path + "dw_w.npy"), + get_weights_accessor(data_path, total_path + "dw_b.npy"), dwc_pad_stride_info, + 1, depth_quant_info.first, depth_quant_info.second) + .set_name(param_path + "/dw") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + .set_name(param_path + "/dw/relu6"); + + sg << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "w.npy"), + get_weights_accessor(data_path, total_path + "b.npy"), conv_pad_stride_info, 1, + point_quant_info.first, point_quant_info.second) + .set_name(param_path + "/pw") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + .set_name(param_path + "/pw/relu6"); return ConcatLayer(std::move(sg)); } - ConcatLayer get_node_B_qasymm(IStream &master_graph, const std::string &data_path, std::string &¶m_path, - unsigned int conv_filt, - PadStrideInfo conv_pad_stride_info_1x1, PadStrideInfo conv_pad_stride_info_3x3, - const std::pair<QuantizationInfo, QuantizationInfo> quant_info_1x1, const std::pair<QuantizationInfo, QuantizationInfo> quant_info_3x3) + ConcatLayer get_node_B_qasymm(IStream &main_graph, + const std::string &data_path, + std::string &¶m_path, + unsigned int conv_filt, + PadStrideInfo conv_pad_stride_info_1x1, + PadStrideInfo conv_pad_stride_info_3x3, + const std::pair<QuantizationInfo, QuantizationInfo> quant_info_1x1, + const std::pair<QuantizationInfo, QuantizationInfo> quant_info_3x3) { const std::string total_path = param_path + "_"; - SubStream sg(master_graph); - - sg << ConvolutionLayer( - 1, 1, conv_filt / 2, - get_weights_accessor(data_path, total_path + "1x1_w.npy"), - get_weights_accessor(data_path, total_path + "1x1_b.npy"), - conv_pad_stride_info_1x1, 1, quant_info_1x1.first, quant_info_1x1.second) - .set_name(total_path + "1x1/conv") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "1x1/conv/relu6"); - - sg << ConvolutionLayer( - 3, 3, conv_filt, - get_weights_accessor(data_path, total_path + "3x3_w.npy"), - get_weights_accessor(data_path, total_path + "3x3_b.npy"), - conv_pad_stride_info_3x3, 1, quant_info_3x3.first, quant_info_3x3.second) - .set_name(total_path + "3x3/conv") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name(total_path + "3x3/conv/relu6"); + SubStream sg(main_graph); + + sg << ConvolutionLayer(1, 1, conv_filt / 2, get_weights_accessor(data_path, total_path + "1x1_w.npy"), + get_weights_accessor(data_path, total_path + "1x1_b.npy"), conv_pad_stride_info_1x1, 1, + quant_info_1x1.first, quant_info_1x1.second) + .set_name(total_path + "1x1/conv") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + .set_name(total_path + "1x1/conv/relu6"); + + sg << ConvolutionLayer(3, 3, conv_filt, get_weights_accessor(data_path, total_path + "3x3_w.npy"), + get_weights_accessor(data_path, total_path + "3x3_b.npy"), conv_pad_stride_info_3x3, 1, + quant_info_3x3.first, quant_info_3x3.second) + .set_name(total_path + "3x3/conv") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + .set_name(total_path + "3x3/conv/relu6"); return ConcatLayer(std::move(sg)); } - ConcatLayer get_node_C_qasymm(IStream &master_graph, const std::string &data_path, std::string &¶m_path, - unsigned int conv_filt, PadStrideInfo conv_pad_stride_info, - const std::pair<QuantizationInfo, QuantizationInfo> quant_info, TensorShape reshape_shape) + ConcatLayer get_node_C_qasymm(IStream &main_graph, + const std::string &data_path, + std::string &¶m_path, + unsigned int conv_filt, + PadStrideInfo conv_pad_stride_info, + const std::pair<QuantizationInfo, QuantizationInfo> quant_info, + TensorShape reshape_shape) { const std::string total_path = param_path + "_"; - SubStream sg(master_graph); - sg << ConvolutionLayer( - 1U, 1U, conv_filt, - get_weights_accessor(data_path, total_path + "w.npy"), - get_weights_accessor(data_path, total_path + "b.npy"), - conv_pad_stride_info, 1, quant_info.first, quant_info.second) - .set_name(param_path + "/conv"); - if(common_params.data_layout == DataLayout::NCHW) + SubStream sg(main_graph); + sg << ConvolutionLayer(1U, 1U, conv_filt, get_weights_accessor(data_path, total_path + "w.npy"), + get_weights_accessor(data_path, total_path + "b.npy"), conv_pad_stride_info, 1, + quant_info.first, quant_info.second) + .set_name(param_path + "/conv"); + if (common_params.data_layout == DataLayout::NCHW) { sg << PermuteLayer(PermutationVector(2U, 0U, 1U), DataLayout::NHWC); } @@ -461,57 +503,59 @@ private: std::string data_path = common_params.data_path; // Add model path to data path - if(!data_path.empty()) + if (!data_path.empty()) { data_path += "/cnn_data/ssd_mobilenet_qasymm8_model/"; } // Quantization info are saved as pair for each (pointwise/depthwise) convolution layer: <weight_quant_info, output_quant_info> - const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> conv_quant_info = - { - { QuantizationInfo(0.03624850884079933f, 163), QuantizationInfo(0.22219789028167725f, 113) }, // conv0 - { QuantizationInfo(0.0028752065263688564f, 113), QuantizationInfo(0.05433657020330429f, 128) }, // conv13_2_1_1 - { QuantizationInfo(0.0014862528769299388f, 125), QuantizationInfo(0.05037643015384674f, 131) }, // conv13_2_3_3 - { QuantizationInfo(0.00233650766313076f, 113), QuantizationInfo(0.04468846693634987f, 126) }, // conv13_3_1_1 - { QuantizationInfo(0.002501056529581547f, 120), QuantizationInfo(0.06026708707213402f, 111) }, // conv13_3_3_3 - { QuantizationInfo(0.002896666992455721f, 121), QuantizationInfo(0.037775348871946335f, 117) }, // conv13_4_1_1 - { QuantizationInfo(0.0023875406477600336f, 122), QuantizationInfo(0.03881589323282242f, 108) }, // conv13_4_3_3 - { QuantizationInfo(0.0022081052884459496f, 77), QuantizationInfo(0.025450613349676132f, 125) }, // conv13_5_1_1 - { QuantizationInfo(0.00604657270014286f, 121), QuantizationInfo(0.033533502370119095f, 109) } // conv13_5_3_3 + const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> conv_quant_info = { + {QuantizationInfo(0.03624850884079933f, 163), QuantizationInfo(0.22219789028167725f, 113)}, // conv0 + {QuantizationInfo(0.0028752065263688564f, 113), + QuantizationInfo(0.05433657020330429f, 128)}, // conv13_2_1_1 + {QuantizationInfo(0.0014862528769299388f, 125), + QuantizationInfo(0.05037643015384674f, 131)}, // conv13_2_3_3 + {QuantizationInfo(0.00233650766313076f, 113), QuantizationInfo(0.04468846693634987f, 126)}, // conv13_3_1_1 + {QuantizationInfo(0.002501056529581547f, 120), QuantizationInfo(0.06026708707213402f, 111)}, // conv13_3_3_3 + {QuantizationInfo(0.002896666992455721f, 121), + QuantizationInfo(0.037775348871946335f, 117)}, // conv13_4_1_1 + {QuantizationInfo(0.0023875406477600336f, 122), + QuantizationInfo(0.03881589323282242f, 108)}, // conv13_4_3_3 + {QuantizationInfo(0.0022081052884459496f, 77), + QuantizationInfo(0.025450613349676132f, 125)}, // conv13_5_1_1 + {QuantizationInfo(0.00604657270014286f, 121), QuantizationInfo(0.033533502370119095f, 109)} // conv13_5_3_3 }; - const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> depth_quant_info = - { - { QuantizationInfo(0.03408717364072f, 131), QuantizationInfo(0.29286590218544006f, 108) }, // dwsc1 - { QuantizationInfo(0.027518004179000854f, 107), QuantizationInfo(0.20796941220760345, 117) }, // dwsc2 - { QuantizationInfo(0.052489638328552246f, 85), QuantizationInfo(0.4303881824016571f, 142) }, // dwsc3 - { QuantizationInfo(0.016570359468460083f, 79), QuantizationInfo(0.10512150079011917f, 116) }, // dwsc4 - { QuantizationInfo(0.060739465057849884f, 65), QuantizationInfo(0.15331414341926575f, 94) }, // dwsc5 - { QuantizationInfo(0.01324534136801958f, 124), QuantizationInfo(0.13010895252227783f, 153) }, // dwsc6 - { QuantizationInfo(0.032326459884643555f, 124), QuantizationInfo(0.11565316468477249, 156) }, // dwsc7 - { QuantizationInfo(0.029948478564620018f, 155), QuantizationInfo(0.11413891613483429f, 146) }, // dwsc8 - { QuantizationInfo(0.028054025024175644f, 129), QuantizationInfo(0.1142905130982399f, 140) }, // dwsc9 - { QuantizationInfo(0.025204822421073914f, 129), QuantizationInfo(0.14668069779872894f, 149) }, // dwsc10 - { QuantizationInfo(0.019332280382514f, 110), QuantizationInfo(0.1480235457420349f, 91) }, // dwsc11 - { QuantizationInfo(0.0319712869822979f, 88), QuantizationInfo(0.10424695909023285f, 117) }, // dwsc12 - { QuantizationInfo(0.04378943517804146f, 164), QuantizationInfo(0.23176774382591248f, 138) } // dwsc13 + const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> depth_quant_info = { + {QuantizationInfo(0.03408717364072f, 131), QuantizationInfo(0.29286590218544006f, 108)}, // dwsc1 + {QuantizationInfo(0.027518004179000854f, 107), QuantizationInfo(0.20796941220760345, 117)}, // dwsc2 + {QuantizationInfo(0.052489638328552246f, 85), QuantizationInfo(0.4303881824016571f, 142)}, // dwsc3 + {QuantizationInfo(0.016570359468460083f, 79), QuantizationInfo(0.10512150079011917f, 116)}, // dwsc4 + {QuantizationInfo(0.060739465057849884f, 65), QuantizationInfo(0.15331414341926575f, 94)}, // dwsc5 + {QuantizationInfo(0.01324534136801958f, 124), QuantizationInfo(0.13010895252227783f, 153)}, // dwsc6 + {QuantizationInfo(0.032326459884643555f, 124), QuantizationInfo(0.11565316468477249, 156)}, // dwsc7 + {QuantizationInfo(0.029948478564620018f, 155), QuantizationInfo(0.11413891613483429f, 146)}, // dwsc8 + {QuantizationInfo(0.028054025024175644f, 129), QuantizationInfo(0.1142905130982399f, 140)}, // dwsc9 + {QuantizationInfo(0.025204822421073914f, 129), QuantizationInfo(0.14668069779872894f, 149)}, // dwsc10 + {QuantizationInfo(0.019332280382514f, 110), QuantizationInfo(0.1480235457420349f, 91)}, // dwsc11 + {QuantizationInfo(0.0319712869822979f, 88), QuantizationInfo(0.10424695909023285f, 117)}, // dwsc12 + {QuantizationInfo(0.04378943517804146f, 164), QuantizationInfo(0.23176774382591248f, 138)} // dwsc13 }; - const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> point_quant_info = - { - { QuantizationInfo(0.028777318075299263f, 144), QuantizationInfo(0.2663874328136444f, 121) }, // pw1 - { QuantizationInfo(0.015796702355146408f, 127), QuantizationInfo(0.1739964485168457f, 111) }, // pw2 - { QuantizationInfo(0.009349990636110306f, 127), QuantizationInfo(0.1805974692106247f, 104) }, // pw3 - { QuantizationInfo(0.012920888140797615f, 106), QuantizationInfo(0.1205204650759697f, 100) }, // pw4 - { QuantizationInfo(0.008119508624076843f, 145), QuantizationInfo(0.12272439152002335f, 97) }, // pw5 - { QuantizationInfo(0.0070041813887655735f, 115), QuantizationInfo(0.0947074219584465f, 101) }, // pw6 - { QuantizationInfo(0.004827278666198254f, 115), QuantizationInfo(0.0842885747551918f, 110) }, // pw7 - { QuantizationInfo(0.004755120258778334f, 128), QuantizationInfo(0.08283159881830215f, 116) }, // pw8 - { QuantizationInfo(0.007527193054556847f, 142), QuantizationInfo(0.12555131316184998f, 137) }, // pw9 - { QuantizationInfo(0.006050156895071268f, 109), QuantizationInfo(0.10871313512325287f, 124) }, // pw10 - { QuantizationInfo(0.00490700313821435f, 127), QuantizationInfo(0.10364262014627457f, 140) }, // pw11 - { QuantizationInfo(0.006063731852918863, 124), QuantizationInfo(0.11241862177848816f, 125) }, // pw12 - { QuantizationInfo(0.007901716977357864f, 139), QuantizationInfo(0.49889302253723145f, 141) } // pw13 + const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> point_quant_info = { + {QuantizationInfo(0.028777318075299263f, 144), QuantizationInfo(0.2663874328136444f, 121)}, // pw1 + {QuantizationInfo(0.015796702355146408f, 127), QuantizationInfo(0.1739964485168457f, 111)}, // pw2 + {QuantizationInfo(0.009349990636110306f, 127), QuantizationInfo(0.1805974692106247f, 104)}, // pw3 + {QuantizationInfo(0.012920888140797615f, 106), QuantizationInfo(0.1205204650759697f, 100)}, // pw4 + {QuantizationInfo(0.008119508624076843f, 145), QuantizationInfo(0.12272439152002335f, 97)}, // pw5 + {QuantizationInfo(0.0070041813887655735f, 115), QuantizationInfo(0.0947074219584465f, 101)}, // pw6 + {QuantizationInfo(0.004827278666198254f, 115), QuantizationInfo(0.0842885747551918f, 110)}, // pw7 + {QuantizationInfo(0.004755120258778334f, 128), QuantizationInfo(0.08283159881830215f, 116)}, // pw8 + {QuantizationInfo(0.007527193054556847f, 142), QuantizationInfo(0.12555131316184998f, 137)}, // pw9 + {QuantizationInfo(0.006050156895071268f, 109), QuantizationInfo(0.10871313512325287f, 124)}, // pw10 + {QuantizationInfo(0.00490700313821435f, 127), QuantizationInfo(0.10364262014627457f, 140)}, // pw11 + {QuantizationInfo(0.006063731852918863, 124), QuantizationInfo(0.11241862177848816f, 125)}, // pw12 + {QuantizationInfo(0.007901716977357864f, 139), QuantizationInfo(0.49889302253723145f, 141)} // pw13 }; // Quantization info taken from the TfLite SSD MobileNet example @@ -519,114 +563,154 @@ private: // Create core graph graph << InputLayer(input_descriptor.set_quantization_info(in_quant_info), get_weights_accessor(data_path, common_params.image, DataLayout::NHWC)); - graph << ConvolutionLayer( - 3U, 3U, 32U, - get_weights_accessor(data_path, "conv0_w.npy"), - get_weights_accessor(data_path, "conv0_b.npy"), - PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), 1, conv_quant_info.at(0).first, conv_quant_info.at(0).second) - .set_name("conv0"); - graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)).set_name("conv0/relu"); - graph << get_node_A_qasymm(graph, data_path, "conv1", 64U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(0), - point_quant_info.at(0)); - graph << get_node_A_qasymm(graph, data_path, "conv2", 128U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(1), - point_quant_info.at(1)); - graph << get_node_A_qasymm(graph, data_path, "conv3", 128U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(2), - point_quant_info.at(2)); - graph << get_node_A_qasymm(graph, data_path, "conv4", 256U, PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(3), - point_quant_info.at(3)); - graph << get_node_A_qasymm(graph, data_path, "conv5", 256U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(4), - point_quant_info.at(4)); - graph << get_node_A_qasymm(graph, data_path, "conv6", 512U, PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(5), - point_quant_info.at(5)); - graph << get_node_A_qasymm(graph, data_path, "conv7", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(6), - point_quant_info.at(6)); - graph << get_node_A_qasymm(graph, data_path, "conv8", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(7), - point_quant_info.at(7)); - graph << get_node_A_qasymm(graph, data_path, "conv9", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(8), - point_quant_info.at(8)); - graph << get_node_A_qasymm(graph, data_path, "conv10", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(9), - point_quant_info.at(9)); - graph << get_node_A_qasymm(graph, data_path, "conv11", 512U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(10), - point_quant_info.at(10)); + graph << ConvolutionLayer(3U, 3U, 32U, get_weights_accessor(data_path, "conv0_w.npy"), + get_weights_accessor(data_path, "conv0_b.npy"), + PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), 1, + conv_quant_info.at(0).first, conv_quant_info.at(0).second) + .set_name("conv0"); + graph << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::BOUNDED_RELU, 6.f)) + .set_name("conv0/relu"); + graph << get_node_A_qasymm(graph, data_path, "conv1", 64U, + PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(0), point_quant_info.at(0)); + graph << get_node_A_qasymm(graph, data_path, "conv2", 128U, + PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(1), point_quant_info.at(1)); + graph << get_node_A_qasymm(graph, data_path, "conv3", 128U, + PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(2), point_quant_info.at(2)); + graph << get_node_A_qasymm(graph, data_path, "conv4", 256U, + PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(3), point_quant_info.at(3)); + graph << get_node_A_qasymm(graph, data_path, "conv5", 256U, + PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(4), point_quant_info.at(4)); + graph << get_node_A_qasymm(graph, data_path, "conv6", 512U, + PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(5), point_quant_info.at(5)); + graph << get_node_A_qasymm(graph, data_path, "conv7", 512U, + PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(6), point_quant_info.at(6)); + graph << get_node_A_qasymm(graph, data_path, "conv8", 512U, + PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(7), point_quant_info.at(7)); + graph << get_node_A_qasymm(graph, data_path, "conv9", 512U, + PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(8), point_quant_info.at(8)); + graph << get_node_A_qasymm(graph, data_path, "conv10", 512U, + PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(9), point_quant_info.at(9)); + graph << get_node_A_qasymm(graph, data_path, "conv11", 512U, + PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(10), point_quant_info.at(10)); SubStream conv_13(graph); - conv_13 << get_node_A_qasymm(graph, data_path, "conv12", 1024U, PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(11), - point_quant_info.at(11)); - conv_13 << get_node_A_qasymm(conv_13, data_path, "conv13", 1024U, PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(12), - point_quant_info.at(12)); + conv_13 << get_node_A_qasymm(graph, data_path, "conv12", 1024U, + PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(11), point_quant_info.at(11)); + conv_13 << get_node_A_qasymm(conv_13, data_path, "conv13", 1024U, + PadStrideInfo(1U, 1U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + PadStrideInfo(1U, 1U, 0U, 0U), depth_quant_info.at(12), point_quant_info.at(12)); SubStream conv_14(conv_13); - conv_14 << get_node_B_qasymm(conv_13, data_path, "conv13_2", 512U, PadStrideInfo(1U, 1U, 0U, 0U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), conv_quant_info.at(1), - conv_quant_info.at(2)); + conv_14 << get_node_B_qasymm(conv_13, data_path, "conv13_2", 512U, PadStrideInfo(1U, 1U, 0U, 0U), + PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), + conv_quant_info.at(1), conv_quant_info.at(2)); SubStream conv_15(conv_14); - conv_15 << get_node_B_qasymm(conv_14, data_path, "conv13_3", 256U, PadStrideInfo(1U, 1U, 0U, 0U), PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), conv_quant_info.at(3), - conv_quant_info.at(4)); + conv_15 << get_node_B_qasymm(conv_14, data_path, "conv13_3", 256U, PadStrideInfo(1U, 1U, 0U, 0U), + PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + conv_quant_info.at(3), conv_quant_info.at(4)); SubStream conv_16(conv_15); - conv_16 << get_node_B_qasymm(conv_15, data_path, "conv13_4", 256U, PadStrideInfo(1U, 1U, 0U, 0U), PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), conv_quant_info.at(5), - conv_quant_info.at(6)); + conv_16 << get_node_B_qasymm(conv_15, data_path, "conv13_4", 256U, PadStrideInfo(1U, 1U, 0U, 0U), + PadStrideInfo(2U, 2U, 1U, 1U, 1U, 1U, DimensionRoundingType::CEIL), + conv_quant_info.at(5), conv_quant_info.at(6)); SubStream conv_17(conv_16); - conv_17 << get_node_B_qasymm(conv_16, data_path, "conv13_5", 128U, PadStrideInfo(1U, 1U, 0U, 0U), PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), conv_quant_info.at(7), - conv_quant_info.at(8)); + conv_17 << get_node_B_qasymm(conv_16, data_path, "conv13_5", 128U, PadStrideInfo(1U, 1U, 0U, 0U), + PadStrideInfo(2U, 2U, 0U, 1U, 0U, 1U, DimensionRoundingType::CEIL), + conv_quant_info.at(7), conv_quant_info.at(8)); // box_predictor - const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> box_enc_pred_quant_info = - { - { QuantizationInfo(0.005202020984143019f, 136), QuantizationInfo(0.08655580133199692f, 183) }, // boxpredictor0_bep - { QuantizationInfo(0.003121797926723957f, 132), QuantizationInfo(0.03218776360154152f, 140) }, // boxpredictor1_bep - { QuantizationInfo(0.002995674265548587f, 130), QuantizationInfo(0.029072262346744537f, 125) }, // boxpredictor2_bep - { QuantizationInfo(0.0023131705820560455f, 130), QuantizationInfo(0.026488754898309708f, 127) }, // boxpredictor3_bep - { QuantizationInfo(0.0013905081432312727f, 132), QuantizationInfo(0.0199890099465847f, 137) }, // boxpredictor4_bep - { QuantizationInfo(0.00216794665902853f, 121), QuantizationInfo(0.019798893481492996f, 151) } // boxpredictor5_bep + const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> box_enc_pred_quant_info = { + {QuantizationInfo(0.005202020984143019f, 136), + QuantizationInfo(0.08655580133199692f, 183)}, // boxpredictor0_bep + {QuantizationInfo(0.003121797926723957f, 132), + QuantizationInfo(0.03218776360154152f, 140)}, // boxpredictor1_bep + {QuantizationInfo(0.002995674265548587f, 130), + QuantizationInfo(0.029072262346744537f, 125)}, // boxpredictor2_bep + {QuantizationInfo(0.0023131705820560455f, 130), + QuantizationInfo(0.026488754898309708f, 127)}, // boxpredictor3_bep + {QuantizationInfo(0.0013905081432312727f, 132), + QuantizationInfo(0.0199890099465847f, 137)}, // boxpredictor4_bep + {QuantizationInfo(0.00216794665902853f, 121), + QuantizationInfo(0.019798893481492996f, 151)} // boxpredictor5_bep }; const std::vector<TensorShape> box_reshape = // NHWC - { - TensorShape(4U, 1U, 1083U), // boxpredictor0_bep_reshape - TensorShape(4U, 1U, 600U), // boxpredictor1_bep_reshape - TensorShape(4U, 1U, 150U), // boxpredictor2_bep_reshape - TensorShape(4U, 1U, 54U), // boxpredictor3_bep_reshape - TensorShape(4U, 1U, 24U), // boxpredictor4_bep_reshape - TensorShape(4U, 1U, 6U) // boxpredictor5_bep_reshape - }; + { + TensorShape(4U, 1U, 1083U), // boxpredictor0_bep_reshape + TensorShape(4U, 1U, 600U), // boxpredictor1_bep_reshape + TensorShape(4U, 1U, 150U), // boxpredictor2_bep_reshape + TensorShape(4U, 1U, 54U), // boxpredictor3_bep_reshape + TensorShape(4U, 1U, 24U), // boxpredictor4_bep_reshape + TensorShape(4U, 1U, 6U) // boxpredictor5_bep_reshape + }; SubStream conv_11_box_enc_pre(graph); - conv_11_box_enc_pre << get_node_C_qasymm(graph, data_path, "BoxPredictor_0_BEP", 12U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(0), box_reshape.at(0)); + conv_11_box_enc_pre << get_node_C_qasymm(graph, data_path, "BoxPredictor_0_BEP", 12U, + PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(0), + box_reshape.at(0)); SubStream conv_13_box_enc_pre(conv_13); - conv_13_box_enc_pre << get_node_C_qasymm(conv_13, data_path, "BoxPredictor_1_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(1), box_reshape.at(1)); + conv_13_box_enc_pre << get_node_C_qasymm(conv_13, data_path, "BoxPredictor_1_BEP", 24U, + PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(1), + box_reshape.at(1)); SubStream conv_14_2_box_enc_pre(conv_14); - conv_14_2_box_enc_pre << get_node_C_qasymm(conv_14, data_path, "BoxPredictor_2_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(2), box_reshape.at(2)); + conv_14_2_box_enc_pre << get_node_C_qasymm(conv_14, data_path, "BoxPredictor_2_BEP", 24U, + PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(2), + box_reshape.at(2)); SubStream conv_15_2_box_enc_pre(conv_15); - conv_15_2_box_enc_pre << get_node_C_qasymm(conv_15, data_path, "BoxPredictor_3_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(3), box_reshape.at(3)); + conv_15_2_box_enc_pre << get_node_C_qasymm(conv_15, data_path, "BoxPredictor_3_BEP", 24U, + PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(3), + box_reshape.at(3)); SubStream conv_16_2_box_enc_pre(conv_16); - conv_16_2_box_enc_pre << get_node_C_qasymm(conv_16, data_path, "BoxPredictor_4_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(4), box_reshape.at(4)); + conv_16_2_box_enc_pre << get_node_C_qasymm(conv_16, data_path, "BoxPredictor_4_BEP", 24U, + PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(4), + box_reshape.at(4)); SubStream conv_17_2_box_enc_pre(conv_17); - conv_17_2_box_enc_pre << get_node_C_qasymm(conv_17, data_path, "BoxPredictor_5_BEP", 24U, PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(5), box_reshape.at(5)); + conv_17_2_box_enc_pre << get_node_C_qasymm(conv_17, data_path, "BoxPredictor_5_BEP", 24U, + PadStrideInfo(1U, 1U, 0U, 0U), box_enc_pred_quant_info.at(5), + box_reshape.at(5)); SubStream box_enc_pre(graph); const QuantizationInfo bep_concate_qinfo = QuantizationInfo(0.08655580133199692f, 183); - box_enc_pre << ConcatLayer(arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::HEIGHT, bep_concate_qinfo), - std::move(conv_11_box_enc_pre), std::move(conv_13_box_enc_pre), conv_14_2_box_enc_pre, std::move(conv_15_2_box_enc_pre), + box_enc_pre << ConcatLayer(arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::HEIGHT, + bep_concate_qinfo), + std::move(conv_11_box_enc_pre), std::move(conv_13_box_enc_pre), + conv_14_2_box_enc_pre, std::move(conv_15_2_box_enc_pre), std::move(conv_16_2_box_enc_pre), std::move(conv_17_2_box_enc_pre)) - .set_name("BoxPredictor/concat"); + .set_name("BoxPredictor/concat"); box_enc_pre << ReshapeLayer(TensorShape(4U, 1917U)).set_name("BoxPredictor/reshape"); // class_predictor - const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> class_pred_quant_info = - { - { QuantizationInfo(0.002744135679677129f, 125), QuantizationInfo(0.05746262148022652f, 234) }, // boxpredictor0_cp - { QuantizationInfo(0.0024326108396053314f, 80), QuantizationInfo(0.03764628246426582f, 217) }, // boxpredictor1_cp - { QuantizationInfo(0.0013898586621508002f, 141), QuantizationInfo(0.034081317484378815f, 214) }, // boxpredictor2_cp - { QuantizationInfo(0.0014176908880472183f, 133), QuantizationInfo(0.033889178186655045f, 215) }, // boxpredictor3_cp - { QuantizationInfo(0.001090311910957098f, 125), QuantizationInfo(0.02646234817802906f, 230) }, // boxpredictor4_cp - { QuantizationInfo(0.001134163816459477f, 115), QuantizationInfo(0.026926767081022263f, 218) } // boxpredictor5_cp + const std::vector<std::pair<QuantizationInfo, QuantizationInfo>> class_pred_quant_info = { + {QuantizationInfo(0.002744135679677129f, 125), + QuantizationInfo(0.05746262148022652f, 234)}, // boxpredictor0_cp + {QuantizationInfo(0.0024326108396053314f, 80), + QuantizationInfo(0.03764628246426582f, 217)}, // boxpredictor1_cp + {QuantizationInfo(0.0013898586621508002f, 141), + QuantizationInfo(0.034081317484378815f, 214)}, // boxpredictor2_cp + {QuantizationInfo(0.0014176908880472183f, 133), + QuantizationInfo(0.033889178186655045f, 215)}, // boxpredictor3_cp + {QuantizationInfo(0.001090311910957098f, 125), + QuantizationInfo(0.02646234817802906f, 230)}, // boxpredictor4_cp + {QuantizationInfo(0.001134163816459477f, 115), + QuantizationInfo(0.026926767081022263f, 218)} // boxpredictor5_cp }; - const std::vector<TensorShape> class_reshape = - { + const std::vector<TensorShape> class_reshape = { TensorShape(91U, 1083U), // boxpredictor0_cp_reshape TensorShape(91U, 600U), // boxpredictor1_cp_reshape TensorShape(91U, 150U), // boxpredictor2_cp_reshape @@ -636,60 +720,81 @@ private: }; SubStream conv_11_class_pre(graph); - conv_11_class_pre << get_node_C_qasymm(graph, data_path, "BoxPredictor_0_CP", 273U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(0), class_reshape.at(0)); + conv_11_class_pre << get_node_C_qasymm(graph, data_path, "BoxPredictor_0_CP", 273U, + PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(0), + class_reshape.at(0)); SubStream conv_13_class_pre(conv_13); - conv_13_class_pre << get_node_C_qasymm(conv_13, data_path, "BoxPredictor_1_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(1), class_reshape.at(1)); + conv_13_class_pre << get_node_C_qasymm(conv_13, data_path, "BoxPredictor_1_CP", 546U, + PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(1), + class_reshape.at(1)); SubStream conv_14_2_class_pre(conv_14); - conv_14_2_class_pre << get_node_C_qasymm(conv_14, data_path, "BoxPredictor_2_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(2), class_reshape.at(2)); + conv_14_2_class_pre << get_node_C_qasymm(conv_14, data_path, "BoxPredictor_2_CP", 546U, + PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(2), + class_reshape.at(2)); SubStream conv_15_2_class_pre(conv_15); - conv_15_2_class_pre << get_node_C_qasymm(conv_15, data_path, "BoxPredictor_3_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(3), class_reshape.at(3)); + conv_15_2_class_pre << get_node_C_qasymm(conv_15, data_path, "BoxPredictor_3_CP", 546U, + PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(3), + class_reshape.at(3)); SubStream conv_16_2_class_pre(conv_16); - conv_16_2_class_pre << get_node_C_qasymm(conv_16, data_path, "BoxPredictor_4_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(4), class_reshape.at(4)); + conv_16_2_class_pre << get_node_C_qasymm(conv_16, data_path, "BoxPredictor_4_CP", 546U, + PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(4), + class_reshape.at(4)); SubStream conv_17_2_class_pre(conv_17); - conv_17_2_class_pre << get_node_C_qasymm(conv_17, data_path, "BoxPredictor_5_CP", 546U, PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(5), class_reshape.at(5)); + conv_17_2_class_pre << get_node_C_qasymm(conv_17, data_path, "BoxPredictor_5_CP", 546U, + PadStrideInfo(1U, 1U, 0U, 0U), class_pred_quant_info.at(5), + class_reshape.at(5)); const QuantizationInfo cp_concate_qinfo = QuantizationInfo(0.0584389753639698f, 230); SubStream class_pred(graph); - class_pred << ConcatLayer( - arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::WIDTH, cp_concate_qinfo), - std::move(conv_11_class_pre), std::move(conv_13_class_pre), std::move(conv_14_2_class_pre), - std::move(conv_15_2_class_pre), std::move(conv_16_2_class_pre), std::move(conv_17_2_class_pre)) - .set_name("ClassPrediction/concat"); - - const QuantizationInfo logistic_out_qinfo = QuantizationInfo(0.00390625f, 0); - class_pred << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), logistic_out_qinfo).set_name("ClassPrediction/logistic"); - - const int max_detections = 10; - const int max_classes_per_detection = 1; - const float nms_score_threshold = 0.30000001192092896f; - const float nms_iou_threshold = 0.6000000238418579f; - const int num_classes = 90; - const float x_scale = 10.f; - const float y_scale = 10.f; - const float h_scale = 5.f; - const float w_scale = 5.f; - std::array<float, 4> scales = { y_scale, x_scale, w_scale, h_scale }; - const QuantizationInfo anchors_qinfo = QuantizationInfo(0.006453060545027256f, 0); + class_pred << ConcatLayer(arm_compute::graph::descriptors::ConcatLayerDescriptor(DataLayoutDimension::WIDTH, + cp_concate_qinfo), + std::move(conv_11_class_pre), std::move(conv_13_class_pre), + std::move(conv_14_2_class_pre), std::move(conv_15_2_class_pre), + std::move(conv_16_2_class_pre), std::move(conv_17_2_class_pre)) + .set_name("ClassPrediction/concat"); + + const QuantizationInfo logistic_out_qinfo = QuantizationInfo( + 0.00390625f, quantization::get_min_max_values_from_quantized_data_type(common_params.data_type).first); + class_pred << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC), + logistic_out_qinfo) + .set_name("ClassPrediction/logistic"); + + const int max_detections = 10; + const int max_classes_per_detection = 1; + const float nms_score_threshold = 0.30000001192092896f; + const float nms_iou_threshold = 0.6000000238418579f; + const int num_classes = 90; + const float x_scale = 10.f; + const float y_scale = 10.f; + const float h_scale = 5.f; + const float w_scale = 5.f; + std::array<float, 4> scales = {y_scale, x_scale, w_scale, h_scale}; + const QuantizationInfo anchors_qinfo = QuantizationInfo(0.006453060545027256f, 0); SubStream detection_ouput(box_enc_pre); detection_ouput << DetectionPostProcessLayer(std::move(class_pred), - DetectionPostProcessLayerInfo(max_detections, max_classes_per_detection, nms_score_threshold, nms_iou_threshold, num_classes, scales), + DetectionPostProcessLayerInfo( + max_detections, max_classes_per_detection, nms_score_threshold, + nms_iou_threshold, num_classes, scales), get_weights_accessor(data_path, "anchors.npy"), anchors_qinfo) - .set_name("DetectionPostProcess"); + .set_name("DetectionPostProcess"); SubStream ouput_0(detection_ouput); - ouput_0 << OutputLayer(get_npy_output_accessor(detection_boxes_opt->value(), TensorShape(4U, 10U), DataType::F32), 0); + ouput_0 << OutputLayer( + get_npy_output_accessor(detection_boxes_opt->value(), TensorShape(4U, 10U), DataType::F32), 0); SubStream ouput_1(detection_ouput); - ouput_1 << OutputLayer(get_npy_output_accessor(detection_classes_opt->value(), TensorShape(10U), DataType::F32), 1); + ouput_1 << OutputLayer(get_npy_output_accessor(detection_classes_opt->value(), TensorShape(10U), DataType::F32), + 1); SubStream ouput_2(detection_ouput); - ouput_2 << OutputLayer(get_npy_output_accessor(detection_scores_opt->value(), TensorShape(10U), DataType::F32), 2); + ouput_2 << OutputLayer(get_npy_output_accessor(detection_scores_opt->value(), TensorShape(10U), DataType::F32), + 2); SubStream ouput_3(detection_ouput); ouput_3 << OutputLayer(get_npy_output_accessor(num_detections_opt->value(), TensorShape(1U), DataType::F32), 3); diff --git a/examples/graph_vgg16.cpp b/examples/graph_vgg16.cpp index f6996dadd5..72ac9694b1 100644 --- a/examples/graph_vgg16.cpp +++ b/examples/graph_vgg16.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,22 +36,11 @@ using namespace arm_compute::graph_utils; class GraphVGG16Example : public Example { public: - GraphVGG16Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG16") + GraphVGG16Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG16") { } bool do_setup(int argc, char **argv) override { - // Check if the system has enough RAM to run the example, systems with less than 2GB have - // to hint the API to minimize memory consumption otherwise it'll run out of memory and - // fail throwing the bad_alloc exception - arm_compute::MEMInfo meminfo; - const size_t mem_total = meminfo.get_total_in_kb(); - if(mem_total <= arm_compute::MEMInfo::TWO_GB_IN_KB) - { - arm_compute::MEMInfo::set_policy(arm_compute::MemoryPolicy::MINIMIZE); - } - // Parse arguments cmd_parser.parse(argc, argv); cmd_parser.validate(); @@ -59,7 +49,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -72,161 +62,153 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - const std::array<float, 3> mean_rgb{ { 123.68f, 116.779f, 103.939f } }; - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); + const std::array<float, 3> mean_rgb{{123.68f, 116.779f, 103.939f}}; + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; // Create graph - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) - // Layer 1 - << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv1_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu") - // Layer 2 - << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv1_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_2/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool1") - // Layer 3 - << ConvolutionLayer( - 3U, 3U, 128U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_1/Relu") - // Layer 4 - << ConvolutionLayer( - 3U, 3U, 128U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_2/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool2") - // Layer 5 - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv3_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_1/Relu") - // Layer 6 - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv3_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_2/Relu") - // Layer 7 - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv3_3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_3/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool3") - // Layer 8 - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv4_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_1/Relu") - // Layer 9 - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv4_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_2/Relu") - // Layer 10 - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv4_3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_3/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool4") - // Layer 11 - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv5_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_1/Relu") - // Layer 12 - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv5_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_2/Relu") - // Layer 13 - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv5_3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_3/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool5") - // Layer 14 - << FullyConnectedLayer( - 4096U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_b.npy")) - .set_name("fc6") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu") - // Layer 15 - << FullyConnectedLayer( - 4096U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_b.npy")) - .set_name("fc7") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1") - // Layer 16 - << FullyConnectedLayer( - 1000U, - get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_b.npy")) - .set_name("fc8") - // Softmax - << SoftmaxLayer().set_name("prob") - << OutputLayer(get_output_accessor(common_params, 5)); + graph + << common_params.target << common_params.fast_math_hint + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) + // Layer 1 + << ConvolutionLayer( + 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv1_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1_1/Relu") + // Layer 2 + << ConvolutionLayer( + 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv1_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv1_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1_2/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool1") + // Layer 3 + << ConvolutionLayer( + 3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv2_1/Relu") + // Layer 4 + << ConvolutionLayer( + 3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv2_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv2_2/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool2") + // Layer 5 + << ConvolutionLayer( + 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv3_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_1/Relu") + // Layer 6 + << ConvolutionLayer( + 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv3_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_2/Relu") + // Layer 7 + << ConvolutionLayer( + 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv3_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv3_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_3/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool3") + // Layer 8 + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv4_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_1/Relu") + // Layer 9 + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv4_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_2/Relu") + // Layer 10 + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv4_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv4_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_3/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool4") + // Layer 11 + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv5_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_1/Relu") + // Layer 12 + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv5_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_2/Relu") + // Layer 13 + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/conv5_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv5_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_3/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool5") + // Layer 14 + << FullyConnectedLayer(4096U, + get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc6_b.npy")) + .set_name("fc6") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu") + // Layer 15 + << FullyConnectedLayer(4096U, + get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc7_b.npy")) + .set_name("fc7") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1") + // Layer 16 + << FullyConnectedLayer(1000U, + get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg16_model/fc8_b.npy")) + .set_name("fc8") + // Softmax + << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); diff --git a/examples/graph_vgg19.cpp b/examples/graph_vgg19.cpp index f9f5c213d5..9293544655 100644 --- a/examples/graph_vgg19.cpp +++ b/examples/graph_vgg19.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 ARM Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -34,22 +35,11 @@ using namespace arm_compute::graph_utils; class GraphVGG19Example : public Example { public: - GraphVGG19Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG19") + GraphVGG19Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VGG19") { } bool do_setup(int argc, char **argv) override { - // Check if the system has enough RAM to run the example, systems with less than 2GB have - // to hint the API to minimize memory consumption otherwise it'll run out of memory and - // fail throwing the bad_alloc exception - arm_compute::MEMInfo meminfo; - const size_t mem_total = meminfo.get_total_in_kb(); - if(mem_total <= arm_compute::MEMInfo::TWO_GB_IN_KB) - { - arm_compute::MEMInfo::set_policy(arm_compute::MemoryPolicy::MINIMIZE); - } - // Parse arguments cmd_parser.parse(argc, argv); cmd_parser.validate(); @@ -58,7 +48,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -71,173 +61,162 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - const std::array<float, 3> mean_rgb{ { 123.68f, 116.779f, 103.939f } }; - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<CaffePreproccessor>(mean_rgb); + const std::array<float, 3> mean_rgb{{123.68f, 116.779f, 103.939f}}; + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<CaffePreproccessor>(mean_rgb); // Create input descriptor const auto operation_layout = common_params.data_layout; - const TensorShape tensor_shape = permute_shape(TensorShape(224U, 224U, 3U, 1U), DataLayout::NCHW, operation_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(224U, 224U, 3U, common_params.batches), DataLayout::NCHW, operation_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(operation_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint - << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) - // Layer 1 - << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv1_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_1/Relu") - << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv1_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv1_2/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool1") - // Layer 2 - << ConvolutionLayer( - 3U, 3U, 128U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_1/Relu") - << ConvolutionLayer( - 3U, 3U, 128U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv2_2/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool2") - // Layer 3 - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv3_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_1/Relu") - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv3_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_2/Relu") - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv3_3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_3/Relu") - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv3_4") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv3_4/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool3") - // Layer 4 - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv4_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_1/Relu") - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv4_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_2/Relu") - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv4_3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_3/Relu") - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv4_4") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv4_4/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool4") - // Layer 5 - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv5_1") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_1/Relu") - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv5_2") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_2/Relu") - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv5_3") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_3/Relu") - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_b.npy"), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv5_4") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv5_4/Relu") - << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))).set_name("pool5") - // Layer 6 - << FullyConnectedLayer( - 4096U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_b.npy")) - .set_name("fc6") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu") - // Layer 7 - << FullyConnectedLayer( - 4096U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_b.npy")) - .set_name("fc7") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1") - // Layer 8 - << FullyConnectedLayer( - 1000U, - get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_b.npy")) - .set_name("fc8") - // Softmax - << SoftmaxLayer().set_name("prob") - << OutputLayer(get_output_accessor(common_params, 5)); + graph + << common_params.target << common_params.fast_math_hint + << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor))) + // Layer 1 + << ConvolutionLayer( + 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv1_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1_1/Relu") + << ConvolutionLayer( + 3U, 3U, 64U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv1_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv1_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv1_2/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool1") + // Layer 2 + << ConvolutionLayer( + 3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv2_1/Relu") + << ConvolutionLayer( + 3U, 3U, 128U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv2_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv2_2/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool2") + // Layer 3 + << ConvolutionLayer( + 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv3_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_1/Relu") + << ConvolutionLayer( + 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv3_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_2/Relu") + << ConvolutionLayer( + 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv3_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_3/Relu") + << ConvolutionLayer( + 3U, 3U, 256U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv3_4_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv3_4") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv3_4/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool3") + // Layer 4 + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv4_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_1/Relu") + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv4_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_2/Relu") + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv4_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_3/Relu") + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv4_4_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv4_4") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv4_4/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool4") + // Layer 5 + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_1_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv5_1") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_1/Relu") + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_2_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv5_2") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_2/Relu") + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_3_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv5_3") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_3/Relu") + << ConvolutionLayer( + 3U, 3U, 512U, get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/conv5_4_b.npy"), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv5_4") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv5_4/Relu") + << PoolingLayer(PoolingLayerInfo(PoolingType::MAX, 2, operation_layout, PadStrideInfo(2, 2, 0, 0))) + .set_name("pool5") + // Layer 6 + << FullyConnectedLayer(4096U, + get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc6_b.npy")) + .set_name("fc6") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu") + // Layer 7 + << FullyConnectedLayer(4096U, + get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc7_b.npy")) + .set_name("fc7") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("Relu_1") + // Layer 8 + << FullyConnectedLayer(1000U, + get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/vgg19_model/fc8_b.npy")) + .set_name("fc8") + // Softmax + << SoftmaxLayer().set_name("prob") << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); diff --git a/examples/graph_vgg_vdsr.cpp b/examples/graph_vgg_vdsr.cpp index c308236f5b..a6cd337f82 100644 --- a/examples/graph_vgg_vdsr.cpp +++ b/examples/graph_vgg_vdsr.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2020 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -36,8 +37,7 @@ using namespace arm_compute::graph_utils; class GraphVDSRExample : public Example { public: - GraphVDSRExample() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VDSR") + GraphVDSRExample() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "VDSR") { model_input_width = cmd_parser.add_option<SimpleOption<unsigned int>>("image-width", 192); model_input_height = cmd_parser.add_option<SimpleOption<unsigned int>>("image-height", 192); @@ -46,7 +46,7 @@ public: model_input_width->set_help("Input image width."); model_input_height->set_help("Input image height."); } - GraphVDSRExample(const GraphVDSRExample &) = delete; + GraphVDSRExample(const GraphVDSRExample &) = delete; GraphVDSRExample &operator=(const GraphVDSRExample &) = delete; ~GraphVDSRExample() override = default; bool do_setup(int argc, char **argv) override @@ -59,7 +59,7 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; @@ -79,18 +79,20 @@ public: const std::string model_path = "/cnn_data/vdsr_model/"; // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(); // Create input descriptor - const TensorShape tensor_shape = permute_shape(TensorShape(image_width, image_height, 1U, 1U), DataLayout::NCHW, common_params.data_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(image_width, image_height, 1U, common_params.batches), DataLayout::NCHW, + common_params.data_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; // Note: Quantization info are random and used only for benchmarking purposes - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor.set_quantization_info(QuantizationInfo(0.0078125f, 128)), get_input_accessor(common_params, std::move(preprocessor), false)); @@ -98,49 +100,48 @@ public: SubStream right(graph); // Layer 1 - right << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, "conv0_w.npy", weights_layout), - get_weights_accessor(data_path, "conv0_b.npy"), - PadStrideInfo(1, 1, 1, 1), 1, QuantizationInfo(0.031778190285f, 156), QuantizationInfo(0.0784313753247f, 128)) - .set_name("conv0") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv0/Relu"); + right << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, "conv0_w.npy", weights_layout), + get_weights_accessor(data_path, "conv0_b.npy"), PadStrideInfo(1, 1, 1, 1), 1, + QuantizationInfo(0.031778190285f, 156), QuantizationInfo(0.0784313753247f, 128)) + .set_name("conv0") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv0/Relu"); // Rest 17 layers - for(unsigned int i = 1; i < 19; ++i) + for (unsigned int i = 1; i < 19; ++i) { const std::string conv_w_path = "conv" + arm_compute::support::cpp11::to_string(i) + "_w.npy"; const std::string conv_b_path = "conv" + arm_compute::support::cpp11::to_string(i) + "_b.npy"; const std::string conv_name = "conv" + arm_compute::support::cpp11::to_string(i); - right << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, conv_w_path, weights_layout), - get_weights_accessor(data_path, conv_b_path), - PadStrideInfo(1, 1, 1, 1), 1, QuantizationInfo(0.015851572156f, 93)) - .set_name(conv_name) - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name(conv_name + "/Relu"); + right << ConvolutionLayer(3U, 3U, 64U, get_weights_accessor(data_path, conv_w_path, weights_layout), + get_weights_accessor(data_path, conv_b_path), PadStrideInfo(1, 1, 1, 1), 1, + QuantizationInfo(0.015851572156f, 93)) + .set_name(conv_name) + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name(conv_name + "/Relu"); } // Final layer - right << ConvolutionLayer( - 3U, 3U, 1U, - get_weights_accessor(data_path, "conv20_w.npy", weights_layout), - get_weights_accessor(data_path, "conv20_b.npy"), - PadStrideInfo(1, 1, 1, 1), 1, QuantizationInfo(0.015851572156f, 93)) - .set_name("conv20") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)).set_name("conv20/Relu"); + right << ConvolutionLayer(3U, 3U, 1U, get_weights_accessor(data_path, "conv20_w.npy", weights_layout), + get_weights_accessor(data_path, "conv20_b.npy"), PadStrideInfo(1, 1, 1, 1), 1, + QuantizationInfo(0.015851572156f, 93)) + .set_name("conv20") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)) + .set_name("conv20/Relu"); // Add residual to input graph << EltwiseLayer(std::move(left), std::move(right), EltwiseOperation::Add).set_name("add") - << OutputLayer(arm_compute::support::cpp14::make_unique<DummyAccessor>(0)); + << OutputLayer(std::make_unique<DummyAccessor>(0)); // Finalize graph GraphConfig config; - config.num_threads = common_params.threads; - config.use_tuner = common_params.enable_tuner; - config.tuner_mode = common_params.tuner_mode; - config.tuner_file = common_params.tuner_file; - config.convert_to_uint8 = (common_params.data_type == DataType::QASYMM8); + config.num_threads = common_params.threads; + config.use_tuner = common_params.enable_tuner; + config.tuner_mode = common_params.tuner_mode; + config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; + config.use_synthetic_type = arm_compute::is_data_type_quantized(common_params.data_type); + config.synthetic_type = common_params.data_type; graph.finalize(common_params.target, config); @@ -155,8 +156,8 @@ public: private: CommandLineParser cmd_parser; CommonGraphOptions common_opts; - SimpleOption<unsigned int> *model_input_width{ nullptr }; - SimpleOption<unsigned int> *model_input_height{ nullptr }; + SimpleOption<unsigned int> *model_input_width{nullptr}; + SimpleOption<unsigned int> *model_input_height{nullptr}; CommonGraphParams common_params; Stream graph; }; diff --git a/examples/graph_yolov3.cpp b/examples/graph_yolov3.cpp index bbc6b729d1..5c8d3426ec 100644 --- a/examples/graph_yolov3.cpp +++ b/examples/graph_yolov3.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,6 +22,7 @@ * SOFTWARE. */ #include "arm_compute/graph.h" + #include "support/ToolchainSupport.h" #include "utils/CommonGraphOptions.h" #include "utils/GraphUtils.h" @@ -35,8 +36,7 @@ using namespace arm_compute::graph_utils; class GraphYOLOv3Example : public Example { public: - GraphYOLOv3Example() - : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "YOLOv3") + GraphYOLOv3Example() : cmd_parser(), common_opts(cmd_parser), common_params(), graph(0, "YOLOv3") { } @@ -50,14 +50,15 @@ public: common_params = consume_common_graph_parameters(common_opts); // Return when help menu is requested - if(common_params.help) + if (common_params.help) { cmd_parser.print_help(argv[0]); return false; } // Checks - ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), "QASYMM8 not supported for this graph"); + ARM_COMPUTE_EXIT_ON_MSG(arm_compute::is_data_type_quantized_asymmetric(common_params.data_type), + "QASYMM8 not supported for this graph"); // Print parameter values std::cout << common_params << std::endl; @@ -66,334 +67,325 @@ public: std::string data_path = common_params.data_path; // Create a preprocessor object - std::unique_ptr<IPreprocessor> preprocessor = arm_compute::support::cpp14::make_unique<TFPreproccessor>(0.f); + std::unique_ptr<IPreprocessor> preprocessor = std::make_unique<TFPreproccessor>(0.f); // Create input descriptor - const TensorShape tensor_shape = permute_shape(TensorShape(608U, 608U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); - TensorDescriptor input_descriptor = TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); + const TensorShape tensor_shape = + permute_shape(TensorShape(608U, 608U, 3U, 1U), DataLayout::NCHW, common_params.data_layout); + TensorDescriptor input_descriptor = + TensorDescriptor(tensor_shape, common_params.data_type).set_layout(common_params.data_layout); // Set weights trained layout const DataLayout weights_layout = DataLayout::NCHW; - graph << common_params.target - << common_params.fast_math_hint + graph << common_params.target << common_params.fast_math_hint << InputLayer(input_descriptor, get_input_accessor(common_params, std::move(preprocessor), false)); std::pair<SubStream, SubStream> intermediate_layers = darknet53(data_path, weights_layout); - graph << ConvolutionLayer( - 1U, 1U, 512U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_53_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_53") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_beta.npy"), - 0.000001f) - .set_name("conv2d_53/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_53/LeakyRelu") - << ConvolutionLayer( - 3U, 3U, 1024U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_54_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_54") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_beta.npy"), - 0.000001f) - .set_name("conv2d_54/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_54/LeakyRelu") - << ConvolutionLayer( - 1U, 1U, 512U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_55_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_55") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_beta.npy"), - 0.000001f) - .set_name("conv2d_55/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_55/LeakyRelu") - << ConvolutionLayer( - 3U, 3U, 1024U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_56_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_56") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_beta.npy"), - 0.000001f) - .set_name("conv2d_56/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_56/LeakyRelu") - << ConvolutionLayer( - 1U, 1U, 512U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_57_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_57") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_beta.npy"), - 0.000001f) - .set_name("conv2d_57/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_57/LeakyRelu"); + graph + << ConvolutionLayer( + 1U, 1U, 512U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_53_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_53") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_53_beta.npy"), 0.000001f) + .set_name("conv2d_53/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_53/LeakyRelu") + << ConvolutionLayer( + 3U, 3U, 1024U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_54_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_54") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_54_beta.npy"), 0.000001f) + .set_name("conv2d_54/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_54/LeakyRelu") + << ConvolutionLayer( + 1U, 1U, 512U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_55_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_55") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_55_beta.npy"), 0.000001f) + .set_name("conv2d_55/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_55/LeakyRelu") + << ConvolutionLayer( + 3U, 3U, 1024U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_56_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_56") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_56_beta.npy"), 0.000001f) + .set_name("conv2d_56/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_56/LeakyRelu") + << ConvolutionLayer( + 1U, 1U, 512U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_57_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_57") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_57_beta.npy"), 0.000001f) + .set_name("conv2d_57/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_57/LeakyRelu"); SubStream route_1(graph); - graph << ConvolutionLayer( - 3U, 3U, 1024U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_58_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_58") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_beta.npy"), - 0.000001f) - .set_name("conv2d_58/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_58/LeakyRelu") - << ConvolutionLayer( - 1U, 1U, 255U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_59_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_59_b.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_59") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f)).set_name("conv2d_59/Linear") - << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f), 80).set_name("Yolo1") - << OutputLayer(get_output_accessor(common_params, 5)); + graph + << ConvolutionLayer( + 3U, 3U, 1024U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_58_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_58") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_58_beta.npy"), 0.000001f) + .set_name("conv2d_58/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_58/LeakyRelu") + << ConvolutionLayer( + 1U, 1U, 255U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_59_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_59_b.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_59") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f)) + .set_name("conv2d_59/Linear") + << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f)).set_name("Yolo1") + << OutputLayer(get_output_accessor(common_params, 5)); route_1 << ConvolutionLayer( - 1U, 1U, 256U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_60_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_60") + 1U, 1U, 256U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_60_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_60") << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_beta.npy"), - 0.000001f) - .set_name("conv2d_59/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_60/LeakyRelu") - << UpsampleLayer(Size2D(2, 2), InterpolationPolicy::NEAREST_NEIGHBOR).set_name("Upsample_60"); + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_59_beta.npy"), + 0.000001f) + .set_name("conv2d_59/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_60/LeakyRelu") + << ResizeLayer(InterpolationPolicy::NEAREST_NEIGHBOR, 2, 2).set_name("Upsample_60"); SubStream concat_1(route_1); - concat_1 << ConcatLayer(std::move(route_1), std::move(intermediate_layers.second)).set_name("Route1") - << ConvolutionLayer( - 1U, 1U, 256U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_61_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_61") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_beta.npy"), - 0.000001f) - .set_name("conv2d_60/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_61/LeakyRelu") - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_62_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_62") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_beta.npy"), - 0.000001f) - .set_name("conv2d_61/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_62/LeakyRelu") - << ConvolutionLayer( - 1U, 1U, 256U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_63_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_63") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_beta.npy"), - 0.000001f) - .set_name("conv2d_62/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_63/LeakyRelu") - << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_64_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_64") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_beta.npy"), - 0.000001f) - .set_name("conv2d_63/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_64/LeakyRelu") - << ConvolutionLayer( - 1U, 1U, 256U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_65_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_65") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_beta.npy"), - 0.000001f) - .set_name("conv2d_65/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_65/LeakyRelu"); + concat_1 + << ConcatLayer(std::move(route_1), std::move(intermediate_layers.second)).set_name("Route1") + << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_61_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_61") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_60_beta.npy"), 0.000001f) + .set_name("conv2d_60/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_61/LeakyRelu") + << ConvolutionLayer( + 3U, 3U, 512U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_62_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_62") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_61_beta.npy"), 0.000001f) + .set_name("conv2d_61/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_62/LeakyRelu") + << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_63_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_63") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_62_beta.npy"), 0.000001f) + .set_name("conv2d_62/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_63/LeakyRelu") + << ConvolutionLayer( + 3U, 3U, 512U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_64_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_64") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_63_beta.npy"), 0.000001f) + .set_name("conv2d_63/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_64/LeakyRelu") + << ConvolutionLayer( + 1U, 1U, 256U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_65_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_65") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_64_beta.npy"), 0.000001f) + .set_name("conv2d_65/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_65/LeakyRelu"); SubStream route_2(concat_1); - concat_1 << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_66_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_66") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_beta.npy"), - 0.000001f) - .set_name("conv2d_65/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_66/LeakyRelu") - << ConvolutionLayer( - 1U, 1U, 255U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_67_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_67_b.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_67") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f)).set_name("conv2d_67/Linear") - << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f), 80).set_name("Yolo2") - << OutputLayer(get_output_accessor(common_params, 5)); + concat_1 + << ConvolutionLayer( + 3U, 3U, 512U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_66_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_66") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_65_beta.npy"), 0.000001f) + .set_name("conv2d_65/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_66/LeakyRelu") + << ConvolutionLayer( + 1U, 1U, 255U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_67_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_67_b.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_67") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f)) + .set_name("conv2d_67/Linear") + << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f)).set_name("Yolo2") + << OutputLayer(get_output_accessor(common_params, 5)); route_2 << ConvolutionLayer( - 1U, 1U, 128U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_68_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_68") + 1U, 1U, 128U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_68_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_68") << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_beta.npy"), - 0.000001f) - .set_name("conv2d_66/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_68/LeakyRelu") - << UpsampleLayer(Size2D(2, 2), InterpolationPolicy::NEAREST_NEIGHBOR).set_name("Upsample_68"); + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_66_beta.npy"), + 0.000001f) + .set_name("conv2d_66/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_68/LeakyRelu") + << ResizeLayer(InterpolationPolicy::NEAREST_NEIGHBOR, 2, 2).set_name("Upsample_68"); SubStream concat_2(route_2); - concat_2 << ConcatLayer(std::move(route_2), std::move(intermediate_layers.first)).set_name("Route2") - << ConvolutionLayer( - 1U, 1U, 128U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_69_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_69") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_beta.npy"), - 0.000001f) - .set_name("conv2d_67/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_69/LeakyRelu") - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_70_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_70") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_beta.npy"), - 0.000001f) - .set_name("conv2d_68/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_70/LeakyRelu") - << ConvolutionLayer( - 1U, 1U, 128U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_71_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_71") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_beta.npy"), - 0.000001f) - .set_name("conv2d_69/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_71/LeakyRelu") - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_72_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_72") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_beta.npy"), - 0.000001f) - .set_name("conv2d_70/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_72/LeakyRelu") - << ConvolutionLayer( - 1U, 1U, 128U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_73_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_73") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_beta.npy"), - 0.000001f) - .set_name("conv2d_71/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_73/LeakyRelu") - << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_74_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_74") - << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_beta.npy"), - 0.000001f) - .set_name("conv2d_72/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_74/LeakyRelu") - << ConvolutionLayer( - 1U, 1U, 255U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_75_w.npy", weights_layout), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_75_b.npy", weights_layout), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_75") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f)).set_name("conv2d_75/Linear") - << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f), 80).set_name("Yolo3") - << OutputLayer(get_output_accessor(common_params, 5)); + concat_2 + << ConcatLayer(std::move(route_2), std::move(intermediate_layers.first)).set_name("Route2") + << ConvolutionLayer( + 1U, 1U, 128U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_69_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_69") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_67_beta.npy"), 0.000001f) + .set_name("conv2d_67/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_69/LeakyRelu") + << ConvolutionLayer( + 3U, 3U, 256U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_70_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_70") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_68_beta.npy"), 0.000001f) + .set_name("conv2d_68/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_70/LeakyRelu") + << ConvolutionLayer( + 1U, 1U, 128U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_71_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_71") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_69_beta.npy"), 0.000001f) + .set_name("conv2d_69/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_71/LeakyRelu") + << ConvolutionLayer( + 3U, 3U, 256U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_72_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_72") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_70_beta.npy"), 0.000001f) + .set_name("conv2d_70/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_72/LeakyRelu") + << ConvolutionLayer( + 1U, 1U, 128U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_73_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_73") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_71_beta.npy"), 0.000001f) + .set_name("conv2d_71/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_73/LeakyRelu") + << ConvolutionLayer( + 3U, 3U, 256U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_74_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_74") + << BatchNormalizationLayer( + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_72_beta.npy"), 0.000001f) + .set_name("conv2d_72/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_74/LeakyRelu") + << ConvolutionLayer( + 1U, 1U, 255U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_75_w.npy", weights_layout), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_75_b.npy", weights_layout), + PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_75") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LINEAR, 1.f)) + .set_name("conv2d_75/Linear") + << YOLOLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LOGISTIC, 0.1f)).set_name("Yolo3") + << OutputLayer(get_output_accessor(common_params, 5)); // Finalize graph GraphConfig config; @@ -401,6 +393,7 @@ public: config.use_tuner = common_params.enable_tuner; config.tuner_mode = common_params.tuner_mode; config.tuner_file = common_params.tuner_file; + config.mlgo_file = common_params.mlgo_file; graph.finalize(common_params.target, config); @@ -421,64 +414,64 @@ private: std::pair<SubStream, SubStream> darknet53(const std::string &data_path, DataLayout weights_layout) { graph << ConvolutionLayer( - 3U, 3U, 32U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_1_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_1/Conv2D") + 3U, 3U, 32U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_1_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_1/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_beta.npy"), - 0.000001f) - .set_name("conv2d_1/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_1/LeakyRelu") + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_1_beta.npy"), + 0.000001f) + .set_name("conv2d_1/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_1/LeakyRelu") << ConvolutionLayer( - 3U, 3U, 64U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_2_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 1, 1)) - .set_name("conv2d_2/Conv2D") + 3U, 3U, 64U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_2_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1)) + .set_name("conv2d_2/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_beta.npy"), - 0.000001f) - .set_name("conv2d_2/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_2/LeakyRelu"); + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_2_beta.npy"), + 0.000001f) + .set_name("conv2d_2/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_2/LeakyRelu"); darknet53_block(data_path, "3", weights_layout, 32U); graph << ConvolutionLayer( - 3U, 3U, 128U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_5_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 1, 1)) - .set_name("conv2d_5/Conv2D") + 3U, 3U, 128U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_5_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1)) + .set_name("conv2d_5/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_beta.npy"), - 0.000001f) - .set_name("conv2d_5/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_5/LeakyRelu"); + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_5_beta.npy"), + 0.000001f) + .set_name("conv2d_5/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_5/LeakyRelu"); darknet53_block(data_path, "6", weights_layout, 64U); darknet53_block(data_path, "8", weights_layout, 64U); graph << ConvolutionLayer( - 3U, 3U, 256U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_10_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 1, 1)) - .set_name("conv2d_10/Conv2D") + 3U, 3U, 256U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_10_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1)) + .set_name("conv2d_10/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_beta.npy"), - 0.000001f) - .set_name("conv2d_10/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_10/LeakyRelu"); + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_10_beta.npy"), + 0.000001f) + .set_name("conv2d_10/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_10/LeakyRelu"); darknet53_block(data_path, "11", weights_layout, 128U); darknet53_block(data_path, "13", weights_layout, 128U); darknet53_block(data_path, "15", weights_layout, 128U); @@ -489,19 +482,19 @@ private: darknet53_block(data_path, "25", weights_layout, 128U); SubStream layer_36(graph); graph << ConvolutionLayer( - 3U, 3U, 512U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_27_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 1, 1)) - .set_name("conv2d_27/Conv2D") + 3U, 3U, 512U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_27_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1)) + .set_name("conv2d_27/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_beta.npy"), - 0.000001f) - .set_name("conv2d_27/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_27/LeakyRelu"); + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_27_beta.npy"), + 0.000001f) + .set_name("conv2d_27/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_27/LeakyRelu"); darknet53_block(data_path, "28", weights_layout, 256U); darknet53_block(data_path, "30", weights_layout, 256U); darknet53_block(data_path, "32", weights_layout, 256U); @@ -512,19 +505,19 @@ private: darknet53_block(data_path, "42", weights_layout, 256U); SubStream layer_61(graph); graph << ConvolutionLayer( - 3U, 3U, 1024U, - get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_44_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(2, 2, 1, 1)) - .set_name("conv2d_44/Conv2D") + 3U, 3U, 1024U, + get_weights_accessor(data_path, "/cnn_data/yolov3_model/conv2d_44_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(2, 2, 1, 1)) + .set_name("conv2d_44/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_mean.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_var.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_gamma.npy"), - get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_beta.npy"), - 0.000001f) - .set_name("conv2d_44/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_44/LeakyRelu"); + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_mean.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_var.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_gamma.npy"), + get_weights_accessor(data_path, "/cnn_data/yolov3_model/batch_normalization_44_beta.npy"), + 0.000001f) + .set_name("conv2d_44/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_44/LeakyRelu"); darknet53_block(data_path, "45", weights_layout, 512U); darknet53_block(data_path, "47", weights_layout, 512U); darknet53_block(data_path, "49", weights_layout, 512U); @@ -533,43 +526,48 @@ private: return std::pair<SubStream, SubStream>(layer_36, layer_61); } - void darknet53_block(const std::string &data_path, std::string &¶m_path, DataLayout weights_layout, - unsigned int filter_size) + void darknet53_block(const std::string &data_path, + std::string &¶m_path, + DataLayout weights_layout, + unsigned int filter_size) { - std::string total_path = "/cnn_data/yolov3_model/"; - std::string param_path2 = arm_compute::support::cpp11::to_string(arm_compute::support::cpp11::stoi(param_path) + 1); - SubStream i_a(graph); - SubStream i_b(graph); + std::string total_path = "/cnn_data/yolov3_model/"; + std::string param_path2 = + arm_compute::support::cpp11::to_string(arm_compute::support::cpp11::stoi(param_path) + 1); + SubStream i_a(graph); + SubStream i_b(graph); i_a << ConvolutionLayer( - 1U, 1U, filter_size, - get_weights_accessor(data_path, total_path + "conv2d_" + param_path + "_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 0, 0)) - .set_name("conv2d_" + param_path + "/Conv2D") + 1U, 1U, filter_size, + get_weights_accessor(data_path, total_path + "conv2d_" + param_path + "_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 0, 0)) + .set_name("conv2d_" + param_path + "/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_mean.npy"), - get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_var.npy"), - get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_gamma.npy"), - get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_beta.npy"), - 0.000001f) - .set_name("conv2d_" + param_path + "/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_" + param_path + "/LeakyRelu") + get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_mean.npy"), + get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_var.npy"), + get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_gamma.npy"), + get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path + "_beta.npy"), + 0.000001f) + .set_name("conv2d_" + param_path + "/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_" + param_path + "/LeakyRelu") << ConvolutionLayer( - 3U, 3U, filter_size * 2, - get_weights_accessor(data_path, total_path + "conv2d_" + param_path2 + "_w.npy", weights_layout), - std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), - PadStrideInfo(1, 1, 1, 1)) - .set_name("conv2d_" + param_path2 + "/Conv2D") + 3U, 3U, filter_size * 2, + get_weights_accessor(data_path, total_path + "conv2d_" + param_path2 + "_w.npy", weights_layout), + std::unique_ptr<arm_compute::graph::ITensorAccessor>(nullptr), PadStrideInfo(1, 1, 1, 1)) + .set_name("conv2d_" + param_path2 + "/Conv2D") << BatchNormalizationLayer( - get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_mean.npy"), - get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_var.npy"), - get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_gamma.npy"), - get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_beta.npy"), - 0.000001f) - .set_name("conv2d_" + param_path2 + "/BatchNorm") - << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)).set_name("conv2d_" + param_path2 + "/LeakyRelu"); + get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_mean.npy"), + get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_var.npy"), + get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_gamma.npy"), + get_weights_accessor(data_path, total_path + "batch_normalization_" + param_path2 + "_beta.npy"), + 0.000001f) + .set_name("conv2d_" + param_path2 + "/BatchNorm") + << ActivationLayer(ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::LEAKY_RELU, 0.1f)) + .set_name("conv2d_" + param_path2 + "/LeakyRelu"); - graph << EltwiseLayer(std::move(i_a), std::move(i_b), EltwiseOperation::Add).set_name("").set_name("add_" + param_path + "_" + param_path2); + graph << EltwiseLayer(std::move(i_a), std::move(i_b), EltwiseOperation::Add) + .set_name("") + .set_name("add_" + param_path + "_" + param_path2); } }; diff --git a/examples/neon_cartoon_effect.cpp b/examples/neon_cartoon_effect.cpp deleted file mode 100644 index 4285aa41e3..0000000000 --- a/examples/neon_cartoon_effect.cpp +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2017-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "arm_compute/runtime/NEON/NEFunctions.h" - -#include "arm_compute/core/Types.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace utils; - -class NEONCartoonEffectExample : public Example -{ -public: - bool do_setup(int argc, char **argv) override - { - // Open PPM file - PPMLoader ppm; - - if(argc < 2) - { - // Print help - std::cout << "Usage: ./build/neon_cartoon_effect [input_image.ppm]\n\n"; - std::cout << "No input_image provided, creating a dummy 640x480 image\n"; - // Create an empty grayscale 640x480 image - src_img.allocator()->init(TensorInfo(640, 480, Format::U8)); - } - else - { - ppm.open(argv[1]); - ppm.init_image(src_img, Format::U8); - } - - // Initialize just the dimensions and format of the images: - gaus5x5_img.allocator()->init(*src_img.info()); - canny_edge_img.allocator()->init(*src_img.info()); - dst_img.allocator()->init(*src_img.info()); - - // Configure the functions to call - gaus5x5.configure(&src_img, &gaus5x5_img, BorderMode::REPLICATE); - canny_edge.configure(&src_img, &canny_edge_img, 100, 80, 3, 1, BorderMode::REPLICATE); - sub.configure(&gaus5x5_img, &canny_edge_img, &dst_img, ConvertPolicy::SATURATE); - - // Now that the padding requirements are known we can allocate the images: - src_img.allocator()->allocate(); - dst_img.allocator()->allocate(); - gaus5x5_img.allocator()->allocate(); - canny_edge_img.allocator()->allocate(); - - // Fill the input image with the content of the PPM image if a filename was provided: - if(ppm.is_open()) - { - ppm.fill_image(src_img); - output_filename = std::string(argv[1]) + "_out.ppm"; - } - - return true; - } - - void do_run() override - { - // Execute the functions: - gaus5x5.run(); - canny_edge.run(); - sub.run(); - } - - void do_teardown() override - { - // Save the result to file: - if(!output_filename.empty()) - { - save_to_ppm(dst_img, output_filename); // save_to_ppm maps and unmaps the image to store as PPM - } - } - -private: - Image src_img{}, dst_img{}, gaus5x5_img{}, canny_edge_img{}; - NEGaussian5x5 gaus5x5{}; - NECannyEdge canny_edge{}; - NEArithmeticSubtraction sub{}; - std::string output_filename{}; -}; - -/** Main program for cartoon effect test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example<NEONCartoonEffectExample>(argc, argv); -} diff --git a/examples/neon_cnn.cpp b/examples/neon_cnn.cpp index ee6f46d28b..1f7a1ea6ca 100644 --- a/examples/neon_cnn.cpp +++ b/examples/neon_cnn.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2020 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,13 +21,13 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#include "arm_compute/runtime/NEON/NEFunctions.h" - #include "arm_compute/core/Types.h" #include "arm_compute/runtime/Allocator.h" #include "arm_compute/runtime/BlobLifetimeManager.h" #include "arm_compute/runtime/MemoryManagerOnDemand.h" +#include "arm_compute/runtime/NEON/NEFunctions.h" #include "arm_compute/runtime/PoolManager.h" + #include "utils/Utils.h" using namespace arm_compute; @@ -43,20 +43,21 @@ public: // Create memory manager components // We need 2 memory managers: 1 for handling the tensors within the functions (mm_layers) and 1 for handling the input and output tensors of the functions (mm_transitions)) - auto lifetime_mgr0 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager - auto lifetime_mgr1 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager - auto pool_mgr0 = std::make_shared<PoolManager>(); // Create pool manager - auto pool_mgr1 = std::make_shared<PoolManager>(); // Create pool manager - auto mm_layers = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr0, pool_mgr0); // Create the memory manager - auto mm_transitions = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr1, pool_mgr1); // Create the memory manager + auto lifetime_mgr0 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager + auto lifetime_mgr1 = std::make_shared<BlobLifetimeManager>(); // Create lifetime manager + auto pool_mgr0 = std::make_shared<PoolManager>(); // Create pool manager + auto pool_mgr1 = std::make_shared<PoolManager>(); // Create pool manager + auto mm_layers = std::make_shared<MemoryManagerOnDemand>(lifetime_mgr0, pool_mgr0); // Create the memory manager + auto mm_transitions = + std::make_shared<MemoryManagerOnDemand>(lifetime_mgr1, pool_mgr1); // Create the memory manager // The weights and biases tensors should be initialized with the values inferred with the training // Set memory manager where allowed to manage internal memory requirements - conv0 = arm_compute::support::cpp14::make_unique<NEConvolutionLayer>(mm_layers); - conv1 = arm_compute::support::cpp14::make_unique<NEConvolutionLayer>(mm_layers); - fc0 = arm_compute::support::cpp14::make_unique<NEFullyConnectedLayer>(mm_layers); - softmax = arm_compute::support::cpp14::make_unique<NESoftmaxLayer>(mm_layers); + conv0 = std::make_unique<NEConvolutionLayer>(mm_layers); + conv1 = std::make_unique<NEConvolutionLayer>(mm_layers); + fc0 = std::make_unique<NEFullyConnectedLayer>(mm_layers); + softmax = std::make_unique<NESoftmaxLayer>(mm_layers); /* [Initialize tensors] */ @@ -116,7 +117,8 @@ public: // Initialize tensor of fc0 constexpr unsigned int num_labels = 128; - const TensorShape weights_shape_fc0(out_shape_pool1.x() * out_shape_pool1.y() * out_shape_pool1.z(), num_labels); + const TensorShape weights_shape_fc0(out_shape_pool1.x() * out_shape_pool1.y() * out_shape_pool1.z(), + num_labels); const TensorShape biases_shape_fc0(num_labels); const TensorShape out_shape_fc0(num_labels); @@ -138,22 +140,28 @@ public: /* [Configure functions] */ // in:32x32x1: 5x5 convolution, 8 output features maps (OFM) - conv0->configure(&src, &weights0, &biases0, &out_conv0, PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 2 /* pad_x */, 2 /* pad_y */)); + conv0->configure(&src, &weights0, &biases0, &out_conv0, + PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 2 /* pad_x */, 2 /* pad_y */)); // in:32x32x8, out:32x32x8, Activation function: relu act0.configure(&out_conv0, &out_act0, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); // in:32x32x8, out:16x16x8 (2x2 pooling), Pool type function: Max - pool0.configure(&out_act0, &out_pool0, PoolingLayerInfo(PoolingType::MAX, 2, data_layout, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */))); + pool0.configure( + &out_act0, &out_pool0, + PoolingLayerInfo(PoolingType::MAX, 2, data_layout, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */))); // in:16x16x8: 3x3 convolution, 16 output features maps (OFM) - conv1->configure(&out_pool0, &weights1, &biases1, &out_conv1, PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 1 /* pad_x */, 1 /* pad_y */)); + conv1->configure(&out_pool0, &weights1, &biases1, &out_conv1, + PadStrideInfo(1 /* stride_x */, 1 /* stride_y */, 1 /* pad_x */, 1 /* pad_y */)); // in:16x16x16, out:16x16x16, Activation function: relu act1.configure(&out_conv1, &out_act1, ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU)); // in:16x16x16, out:8x8x16 (2x2 pooling), Pool type function: Average - pool1.configure(&out_act1, &out_pool1, PoolingLayerInfo(PoolingType::AVG, 2, data_layout, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */))); + pool1.configure( + &out_act1, &out_pool1, + PoolingLayerInfo(PoolingType::AVG, 2, data_layout, PadStrideInfo(2 /* stride_x */, 2 /* stride_y */))); // in:8x8x16, out:128 fc0->configure(&out_pool1, &weights2, &biases2, &out_fc0); @@ -170,8 +178,8 @@ public: // We need 2 memory groups for handling the input and output // We call explicitly allocate after manage() in order to avoid overlapping lifetimes - memory_group0 = arm_compute::support::cpp14::make_unique<MemoryGroup>(mm_transitions); - memory_group1 = arm_compute::support::cpp14::make_unique<MemoryGroup>(mm_transitions); + memory_group0 = std::make_unique<MemoryGroup>(mm_transitions); + memory_group1 = std::make_unique<MemoryGroup>(mm_transitions); memory_group0->manage(&out_conv0); out_conv0.allocator()->allocate(); @@ -257,7 +265,7 @@ private: Tensor out_fc0{}; Tensor out_softmax{}; - // NEON allocator + // Allocator Allocator allocator{}; // Memory groups diff --git a/examples/neon_convolution.cpp b/examples/neon_convolution.cpp deleted file mode 100644 index 56b4ddc0be..0000000000 --- a/examples/neon_convolution.cpp +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2016-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/NEFunctions.h" - -#include "arm_compute/core/Types.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace utils; - -/** Gaussian 3x3 matrix - */ -const std::array<int16_t, 9> gaussian3x3 = -{ - 1, 2, 1, - 2, 4, 2, - 1, 2, 1 -}; - -/** Gaussian 5x5 matrix - */ -const std::array<int16_t, 25> gaussian5x5 = -{ - 1, 4, 6, 4, 1, - 4, 16, 24, 16, 4, - 6, 24, 36, 24, 6, - 4, 16, 24, 16, 4, - 1, 4, 6, 4, 1 -}; - -class NEONConvolutionExample : public Example -{ -public: - bool do_setup(int argc, char **argv) override - { - /** [Accurate padding] **/ - PPMLoader ppm; - - if(argc < 2) - { - // Print help - std::cout << "Usage: ./build/neon_convolution [input_image.ppm]\n\n"; - std::cout << "No input_image provided, creating a dummy 640x480 image\n"; - // Initialize just the dimensions and format of your buffers: - src.allocator()->init(TensorInfo(640, 480, Format::U8)); - } - else - { - ppm.open(argv[1]); - // Initialize just the dimensions and format of your buffers: - ppm.init_image(src, Format::U8); - } - - // Initialize just the dimensions and format of the temporary and destination images: - tmp.allocator()->init(*src.info()); - dst.allocator()->init(*src.info()); - - // Apply a Gaussian 3x3 filter to the source image followed by a Gaussian 5x5: - // The function will automatically update the padding information inside input and output to match its requirements - conv3x3.configure(&src, &tmp, gaussian3x3.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED); - conv5x5.configure(&tmp, &dst, gaussian5x5.data(), 0 /* Let arm_compute calculate the scale */, BorderMode::UNDEFINED); - - // Now that the padding requirements are known we can allocate the images: - src.allocator()->allocate(); - tmp.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill the input image with the content of the PPM image if a filename was provided: - if(ppm.is_open()) - { - ppm.fill_image(src); - output_filename = std::string(argv[1]) + "_out.ppm"; - } - /** [Accurate padding] **/ - - return true; - } - void do_run() override - { - //Execute the functions: - conv3x3.run(); - conv5x5.run(); - } - void do_teardown() override - { - // Save the result to file: - if(!output_filename.empty()) - { - save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM - } - } - -private: - Image src{}, tmp{}, dst{}; - NEConvolution3x3 conv3x3{}; - NEConvolution5x5 conv5x5{}; - std::string output_filename{}; -}; - -/** Main program for convolution test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example<NEONConvolutionExample>(argc, argv); -} diff --git a/examples/neon_copy_objects.cpp b/examples/neon_copy_objects.cpp index 84a2abd379..6e9ebcaad5 100644 --- a/examples/neon_copy_objects.cpp +++ b/examples/neon_copy_objects.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,9 +22,9 @@ * SOFTWARE. */ +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEFunctions.h" -#include "arm_compute/core/Types.h" #include "utils/Utils.h" #include <cstring> @@ -49,12 +49,12 @@ public: src_data = new float[width * height * batch]; dst_data = new float[width * height * batch]; - // Fill src_data with dummy values: - for(unsigned int b = 0; b < batch; b++) + // Fill src_data with pseudo(meaningless) values: + for (unsigned int b = 0; b < batch; b++) { - for(unsigned int h = 0; h < height; h++) + for (unsigned int h = 0; h < height; h++) { - for(unsigned int w = 0; w < width; w++) + for (unsigned int w = 0; w < width; w++) { src_data[b * (width * height) + h * width + w] = static_cast<float>(100 * b + 10 * h + w); } @@ -78,9 +78,12 @@ public: Window input_window; input_window.use_tensor_dimensions(input.info()->tensor_shape()); std::cout << " Dimensions of the input's iterator:\n"; - std::cout << " X = [start=" << input_window.x().start() << ", end=" << input_window.x().end() << ", step=" << input_window.x().step() << "]\n"; - std::cout << " Y = [start=" << input_window.y().start() << ", end=" << input_window.y().end() << ", step=" << input_window.y().step() << "]\n"; - std::cout << " Z = [start=" << input_window.z().start() << ", end=" << input_window.z().end() << ", step=" << input_window.z().step() << "]\n"; + std::cout << " X = [start=" << input_window.x().start() << ", end=" << input_window.x().end() + << ", step=" << input_window.x().step() << "]\n"; + std::cout << " Y = [start=" << input_window.y().start() << ", end=" << input_window.y().end() + << ", step=" << input_window.y().step() << "]\n"; + std::cout << " Z = [start=" << input_window.z().start() << ", end=" << input_window.z().end() + << ", step=" << input_window.z().step() << "]\n"; // Create an iterator: Iterator input_it(&input, input_window); @@ -98,20 +101,28 @@ public: // } // } // Except it works for an arbitrary number of dimensions - execute_window_loop(input_window, [&](const Coordinates & id) - { - std::cout << "Setting item [" << id.x() << "," << id.y() << "," << id.z() << "]\n"; - *reinterpret_cast<float *>(input_it.ptr()) = src_data[id.z() * (width * height) + id.y() * width + id.x()]; - }, - input_it); + execute_window_loop( + input_window, + [&](const Coordinates &id) + { + std::cout << "Setting item [" << id.x() << "," << id.y() << "," << id.z() << "]\n"; + *reinterpret_cast<float *>(input_it.ptr()) = + src_data[id.z() * (width * height) + id.y() * width + id.x()]; + }, + input_it); // More efficient way: create an iterator to iterate through each row (instead of each element) of the output tensor: Window output_window; - output_window.use_tensor_dimensions(output.info()->tensor_shape(), /* first_dimension =*/Window::DimY); // Iterate through the rows (not each element) + output_window.use_tensor_dimensions( + output.info()->tensor_shape(), + /* first_dimension =*/Window::DimY); // Iterate through the rows (not each element) std::cout << " Dimensions of the output's iterator:\n"; - std::cout << " X = [start=" << output_window.x().start() << ", end=" << output_window.x().end() << ", step=" << output_window.x().step() << "]\n"; - std::cout << " Y = [start=" << output_window.y().start() << ", end=" << output_window.y().end() << ", step=" << output_window.y().step() << "]\n"; - std::cout << " Z = [start=" << output_window.z().start() << ", end=" << output_window.z().end() << ", step=" << output_window.z().step() << "]\n"; + std::cout << " X = [start=" << output_window.x().start() << ", end=" << output_window.x().end() + << ", step=" << output_window.x().step() << "]\n"; + std::cout << " Y = [start=" << output_window.y().start() << ", end=" << output_window.y().end() + << ", step=" << output_window.y().step() << "]\n"; + std::cout << " Z = [start=" << output_window.z().start() << ", end=" << output_window.z().end() + << ", step=" << output_window.z().step() << "]\n"; // Create an iterator: Iterator output_it(&output, output_window); @@ -126,13 +137,15 @@ public: // } // } // Except it works for an arbitrary number of dimensions - execute_window_loop(output_window, [&](const Coordinates & id) - { - std::cout << "Copying one row starting from [" << id.x() << "," << id.y() << "," << id.z() << "]\n"; - // Copy one whole row: - memcpy(dst_data + id.z() * (width * height) + id.y() * width, output_it.ptr(), width * sizeof(float)); - }, - output_it); + execute_window_loop( + output_window, + [&](const Coordinates &id) + { + std::cout << "Copying one row starting from [" << id.x() << "," << id.y() << "," << id.z() << "]\n"; + // Copy one whole row: + memcpy(dst_data + id.z() * (width * height) + id.y() * width, output_it.ptr(), width * sizeof(float)); + }, + output_it); /** [Copy objects example] */ @@ -140,7 +153,7 @@ public: } void do_run() override { - // Run NEON softmax: + // Run softmax: softmax.run(); } void do_teardown() override diff --git a/examples/neon_gemm_qasymm8.cpp b/examples/neon_gemm_qasymm8.cpp index f028e004c2..3aaad02f8a 100644 --- a/examples/neon_gemm_qasymm8.cpp +++ b/examples/neon_gemm_qasymm8.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2020 ARM Limited. + * Copyright (c) 2020-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,12 +22,13 @@ * SOFTWARE. */ #include "arm_compute/core/Types.h" -#include "arm_compute/core/WindowIterator.h" #include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "arm_compute/core/WindowIterator.h" #include "arm_compute/runtime/NEON/NEFunctions.h" #include "arm_compute/runtime/NEON/NEScheduler.h" -#include "utils/Utils.h" + #include "support/ToolchainSupport.h" +#include "utils/Utils.h" #include <cstdlib> @@ -38,7 +39,7 @@ using namespace utils; void find_min_max(int size, const float *data, float *min, float *max) { *min = *max = data[0]; - for(int i = 0; i < size; i++) + for (int i = 0; i < size; i++) { const float val = data[i]; *min = std::min(*min, val); @@ -66,11 +67,11 @@ QuantizationInfo choose_quantization_params(float min, float max) // But we need to nudge the zero_point to an integer (exact quantized value) std::uint8_t zero_point_nudged = 0; - if(zero_point_real < qmin) + if (zero_point_real < qmin) { zero_point_nudged = qmin; } - else if(zero_point_real > qmax) + else if (zero_point_real > qmax) { zero_point_nudged = qmax; } @@ -85,7 +86,7 @@ QuantizationInfo choose_quantization_params(float min, float max) void quantize_values(int size, qasymm8_t *output, float *input, const QuantizationInfo qinfo) { - for(int i = 0; i < size; i++) + for (int i = 0; i < size; i++) { output[i] = quantize_qasymm8(input[i], qinfo); } @@ -102,13 +103,13 @@ int main(int argc, char **argv) Tensor q_dst0; Tensor q_res; Tensor q_res_output; - size_t M = 4; - size_t N = 4; - size_t K = 4; - bool default_input = true; + size_t M = 4; + size_t N = 4; + size_t K = 4; + bool default_input = true; // Parse args - if(argc < 3) /* case default matrix sizes */ + if (argc < 3) /* case default matrix sizes */ { // Print help std::cout << "Usage: ./build/neon_gemm_qasymm8 M N K\n"; @@ -144,20 +145,23 @@ int main(int argc, char **argv) // Fill in: one is the identity matrix, other is sequential values // src1: Identity matrix - for(size_t i = 0; i < M * K; i++) { + for (size_t i = 0; i < M * K; i++) + { src1_ptr[i] = 0; } - for(size_t i = 0; i < M; i++) { + for (size_t i = 0; i < M; i++) + { src1_ptr[i * K + i] = 1.0f; } // src2: Sequential values matrix - for(size_t i = 0; i < K * N; i++) { + for (size_t i = 0; i < K * N; i++) + { src2_ptr[i] = i * 1.123f; } // Otherwise if M, N, K is given, fill in with random values - if(!default_input) + if (!default_input) { fill_random_tensor(src1, 0.f, 1.f); fill_random_tensor(src2, 0.f, 1.f); @@ -217,13 +221,22 @@ int main(int argc, char **argv) qgemm.configure(&q_src1, &q_src2, nullptr, &q_res); // Configure output stage after computing shift and multiplier parameters - NEGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint gemmlowp_output_stage; - int output_multiplier; - int output_shift; + NEGEMMLowpOutputStage gemmlowp_output_stage; + int output_multiplier; + int output_shift; float multiplier = (src1_qinfo.uniform().scale * src2_qinfo.uniform().scale) / dst0_qinfo.uniform().scale; quantization::calculate_quantized_multiplier_less_than_one(multiplier, &output_multiplier, &output_shift); std::cout << "(q_multiplier, q_shift) = (" << output_multiplier << ", " << output_shift << ")\n\n"; - gemmlowp_output_stage.configure(&q_res, nullptr, &q_res_output, output_multiplier, output_shift, dst0_qinfo.uniform().offset); + + GEMMLowpOutputStageInfo info; + info.type = GEMMLowpOutputStageType::QUANTIZE_DOWN_FIXEDPOINT; + info.gemmlowp_multiplier = output_multiplier; + info.gemmlowp_shift = output_shift; + info.gemmlowp_offset = dst0_qinfo.uniform().offset; + info.output_data_type = DataType::QASYMM8; + q_res_output.info()->set_data_type(DataType::QASYMM8); + q_res_output.info()->set_num_channels(1); + gemmlowp_output_stage.configure(&q_res, nullptr, &q_res_output, info); // Allocate all tensors q_src1.allocator()->allocate(); @@ -240,7 +253,7 @@ int main(int argc, char **argv) qgemm.run(); // Run output stage kernel gemmlowp_output_stage.run(); - std::cout << "Done\n"; + std::cout << "\nTest Passed\n"; #if ARM_COMPUTE_DEBUG_ENABLED // Print quantized source matrices diff --git a/examples/neon_gemm_s8_f32.cpp b/examples/neon_gemm_s8_f32.cpp new file mode 100644 index 0000000000..7c1497ec41 --- /dev/null +++ b/examples/neon_gemm_s8_f32.cpp @@ -0,0 +1,239 @@ +/* + * Copyright (c) 2020-2021, 2024 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/core/utils/quantization/AsymmHelpers.h" +#include "arm_compute/core/WindowIterator.h" +#include "arm_compute/runtime/NEON/NEFunctions.h" +#include "arm_compute/runtime/NEON/NEScheduler.h" + +#include "support/ToolchainSupport.h" +#include "utils/Utils.h" + +#include <cstdlib> + +using namespace arm_compute; +using namespace utils; + +QuantizationInfo dynamic_qinfo(QuantizationInfo qinfo) +{ + return QuantizationInfo(qinfo.scale(), qinfo.offset(), true); +} +void set_qinfo_dynamic(Tensor &t) +{ + t.info()->set_quantization_info(dynamic_qinfo(t.info()->quantization_info())); +} + +void quantize(Tensor &qt, const Tensor &t, float min, float max) +{ + DataType dt = DataType::QASYMM8_SIGNED; + + // Determine the scale + const float scale = (max - min) / 256.0f; + + // Determine the zero-point; using affine equation val = (qval-zerop) * scale + const float zero_point = -128.0f - min / scale; + + QuantizationInfo qinfo(scale, (int32_t)round(zero_point), true); + + // We now have the quantisation info and can configure the quantised tensor + qt.allocator()->init(TensorInfo(t.info()->tensor_shape(), 1, dt, qinfo)); + qt.allocator()->allocate(); + NEQuantizationLayer quantization; + quantization.configure(&t, &qt); + quantization.run(); +} + +void invert_qinfo_offset(Tensor &t) +{ + QuantizationInfo qinfo = t.info()->quantization_info(); + t.info()->set_quantization_info(QuantizationInfo(qinfo.scale()[0], -qinfo.offset()[0], qinfo.is_dynamic())); +} + +void print_quantization_info(const Tensor &t, const std::string &name_prefix) +{ + QuantizationInfo qinfo = t.info()->quantization_info(); + std::cout << name_prefix << "_qinfo=" + << "QuantizationInfo(" << qinfo.scale()[0] << ", " << qinfo.offset()[0] << ")\n"; +} + +int main(int argc, char **argv) +{ + size_t M = 4; + size_t N = 4; + size_t K = 4; + + // Parse args + if (argc < 3) /* case default matrix sizes */ + { + // Print help + std::cout << "Usage: ./build/neon_gemm_qasymm8 M N K\n"; + std::cout << "Too few or no inputs provided. Using default M=4, N=4, K=4\n\n"; + } + else /* case M N K arguments provided */ + { + M = strtol(argv[1], nullptr, 10); + N = strtol(argv[2], nullptr, 10); + K = strtol(argv[3], nullptr, 10); + } + + /*** Floating point matrix multiplication ***/ + + // Initialise input matrices + NEGEMM fgemm{}; + + Tensor src1; + Tensor src2; + Tensor dst; + src1.allocator()->init(TensorInfo(TensorShape(K, M), 1, DataType::F32)); + src2.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32)); + dst.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32)); + fgemm.configure(&src1, &src2, nullptr, &dst, 1, 0); + + // Allocate matrices + src1.allocator()->allocate(); + src2.allocator()->allocate(); + dst.allocator()->allocate(); + + float min1 = 0.0f; + float max1 = 1.0f; + fill_random_tensor(src1, 0, min1, max1); + + float min2 = -1.0f; + float max2 = 2.0f; + fill_random_tensor(src2, 1, min2, max2); + + // Run single precision gemm and print result + fgemm.run(); + +#if ARM_COMPUTE_DEBUG_ENABLED + std::cout << "# F32 GEMM result:\n"; + std::cout << "src1=[ \n"; + src1.print(std::cout); + std::cout << "] \n"; + std::cout << "src2=[ \n"; + src2.print(std::cout); + std::cout << "] \n"; + std::cout << "dst=[ \n"; + dst.print(std::cout); + std::cout << "] \n"; +#endif // ARM_COMPUTE_DEBUG_ENABLED + + Tensor q_src1; + quantize(q_src1, src1, min1, max1); + print_quantization_info(q_src1, "src1"); + q_src1.info()->set_are_values_constant(false); + + // NEGEMMLowpMatrixMultiplyCore adopts the opposite convention for the offset + // compared to NEQuantizeLayer + invert_qinfo_offset(q_src1); + + Tensor q_src2; + quantize(q_src2, src2, min2, max2); + print_quantization_info(q_src2, "src2"); + q_src2.info()->set_are_values_constant(false); + + // NEGEMMLowpMatrixMultiplyCore adopts the opposite convention for the offset + // compared to NEQuantizeLayer + invert_qinfo_offset(q_src2); + + // q_dst will be Dequantized to F32 so it doesn't need a QuantizationInfo + Tensor q_dst; + q_dst.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32)); + + // Configure low precision gemm and initialise result tensor (pre-output) + NEGEMMLowpMatrixMultiplyCore qgemm; + qgemm.configure(&q_src1, &q_src2, nullptr, &q_dst); + + q_dst.allocator()->allocate(); + + // Run low precision matrix multiply kernel + qgemm.run(); + +#if ARM_COMPUTE_DEBUG_ENABLED + // Print quantized source matrices + std::cout << "q_src1=[ \n"; + q_src1.print(std::cout); + std::cout << "] \n"; + std::cout << "q_src2=[ \n"; + q_src2.print(std::cout); + std::cout << "] \n"; + std::cout << "# Lowp GEMM output (FP32):\n"; + std::cout << "q_dst=[ \n"; + q_dst.print(std::cout); + std::cout << "] \n"; + + // Expected result + std::cout << "# Expected result:\n"; + std::cout << "dst=[ \n"; + dst.print(std::cout); + std::cout << "] \n"; +#endif // ARM_COMPUTE_DEBUG_ENABLED + + // Rerun to test the ability to modify the Tensor contents and QuantizationInfo (dynamic quantization) + min1 = -1.0f; + max1 = 1.0f; + fill_random_tensor(src1, 2, min1, max1); + +#if ARM_COMPUTE_DEBUG_ENABLED + std::cout << "# Refilled src1\n"; + std::cout << "src1=[ \n"; + src1.print(std::cout); + std::cout << "] \n"; + std::cout << "src2=[ \n"; + src2.print(std::cout); + std::cout << "] \n"; +#endif // ARM_COMPUTE_DEBUG_ENABLED + + fgemm.run(); + + quantize(q_src1, src1, min1, max1); + set_qinfo_dynamic(q_src1); + print_quantization_info(q_src1, "src1"); + + // NEGEMMLowpMatrixMultiplyCore adopts the opposite convention for the offset + // compared to NEQuantizeLayer + invert_qinfo_offset(q_src1); + + qgemm.run(); + +#if ARM_COMPUTE_DEBUG_ENABLED + // Print quantized source matrices + std::cout << "q_src1=[ \n"; + q_src1.print(std::cout); + std::cout << "] \n"; + std::cout << "q_src2=[ \n"; + q_src2.print(std::cout); + std::cout << "] \n"; + std::cout << "# Lowp GEMM output (FP32):\n"; + std::cout << "q_dst=[ \n"; + q_dst.print(std::cout); + std::cout << "] \n"; + + // Expected result + std::cout << "# Expected result:\n"; + std::cout << "dst=[ \n"; + dst.print(std::cout); + std::cout << "] \n"; +#endif // ARM_COMPUTE_DEBUG_ENABLED +} diff --git a/examples/neon_opticalflow.cpp b/examples/neon_opticalflow.cpp deleted file mode 100644 index b5df819e97..0000000000 --- a/examples/neon_opticalflow.cpp +++ /dev/null @@ -1,263 +0,0 @@ -/* - * Copyright (c) 2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "arm_compute/runtime/NEON/NEFunctions.h" - -#include "arm_compute/core/Types.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -#include <fstream> -#include <sstream> -#include <vector> - -using namespace arm_compute; -using namespace utils; - -class NeonOpticalFlowExample : public Example -{ -public: - NeonOpticalFlowExample() - : input_points(100), output_points(100), point_estimates(100) - { - } - - bool do_setup(int argc, char **argv) override - { - if(argc < 5) - { - // Print help - std::cout << "Usage: ./build/neon_opticalflow [src_1st.ppm] [src_2nd.ppm] [keypoints] [estimates]\n\n"; - const unsigned int img_width = 64; - const unsigned int img_height = 64; - const unsigned int rect_x = 20; - const unsigned int rect_y = 40; - const unsigned int rect_s = 8; - const unsigned int offsetx = 24; - const unsigned int offsety = 3; - std::cout << "No input_image provided, creating test data:\n"; - std::cout << "\t Image src_1st = (" << img_width << "," << img_height << ")" << std::endl; - std::cout << "\t Image src_2nd = (" << img_width << "," << img_height << ")" << std::endl; - init_img(src_1st, img_width, img_height, rect_x, rect_y, rect_s); - init_img(src_2nd, img_width, img_height, rect_x + offsetx, rect_y + offsety, rect_s); - const int num_points = 4; - input_points.resize(num_points); - point_estimates.resize(num_points); - const std::array<unsigned int, num_points> tracking_coordsx = { rect_x - 1, rect_x, rect_x + 1, rect_x + 2 }; - const std::array<unsigned int, num_points> tracking_coordsy = { rect_y - 1, rect_y, rect_y + 1, rect_y + 2 }; - const std::array<unsigned int, num_points> estimate_coordsx = { rect_x + offsetx - 1, rect_x + offsetx, rect_x + offsetx + 1, rect_x + offsetx + 2 }; - const std::array<unsigned int, num_points> estimate_coordsy = { rect_y + offsety - 1, rect_y + offsety, rect_y + offsety + 1, rect_y + offsety + 2 }; - - for(int k = 0; k < num_points; ++k) - { - auto &keypoint = input_points.at(k); - keypoint.x = tracking_coordsx[k]; - keypoint.y = tracking_coordsy[k]; - keypoint.tracking_status = 1; - } - for(int k = 0; k < num_points; ++k) - { - auto &keypoint = point_estimates.at(k); - keypoint.x = estimate_coordsx[k]; - keypoint.y = estimate_coordsy[k]; - keypoint.tracking_status = 1; - } - } - else - { - load_ppm(argv[1], src_1st); - load_ppm(argv[2], src_2nd); - load_keypoints(argv[3], input_points); - load_keypoints(argv[4], point_estimates); - } - - print_points(input_points, "Tracking points : "); - print_points(point_estimates, "Estimates points : "); - - const unsigned int num_levels = 3; - // Initialise and allocate pyramids - PyramidInfo pyramid_info(num_levels, SCALE_PYRAMID_HALF, src_1st.info()->tensor_shape(), src_1st.info()->format()); - pyr_1st.init_auto_padding(pyramid_info); - pyr_2nd.init_auto_padding(pyramid_info); - - pyrf_1st.configure(&src_1st, &pyr_1st, BorderMode::UNDEFINED, 0); - pyrf_2nd.configure(&src_2nd, &pyr_2nd, BorderMode::UNDEFINED, 0); - - output_points.resize(input_points.num_values()); - - optkf.configure(&pyr_1st, &pyr_2nd, - &input_points, &point_estimates, &output_points, - Termination::TERM_CRITERIA_BOTH, 0.01f, 15, 5, true, BorderMode::UNDEFINED, 0); - - pyr_1st.allocate(); - pyr_2nd.allocate(); - - return true; - } - void do_run() override - { - //Execute the functions: - pyrf_1st.run(); - pyrf_2nd.run(); - optkf.run(); - } - void do_teardown() override - { - print_points(output_points, "Output points : "); - } - -private: - /** Loads the input keypoints from a file into an array - * - * @param[in] fn Filename containing the keypoints. Each line must have two values X Y. - * @param[out] img Reference to an unintialised KeyPointArray - */ - bool load_keypoints(const std::string &fn, KeyPointArray &array) - { - assert(!fn.empty()); - std::ifstream f(fn); - if(f.is_open()) - { - std::cout << "Reading points from " << fn << std::endl; - std::vector<KeyPoint> v; - for(std::string line; std::getline(f, line);) - { - std::stringstream ss(line); - std::string xcoord; - std::string ycoord; - getline(ss, xcoord, ' '); - getline(ss, ycoord, ' '); - KeyPoint kp; - kp.x = std::stoi(xcoord); - kp.y = std::stoi(ycoord); - kp.tracking_status = 1; - v.push_back(kp); - } - const int num_points = v.size(); - array.resize(num_points); - for(int k = 0; k < num_points; ++k) - { - auto &keypoint = array.at(k); - keypoint = v[k]; - } - return true; - } - else - { - std::cout << "Cannot open keypoints file " << fn << std::endl; - return false; - } - } - - /** Creates and Image and fills it with the ppm data from the file - * - * @param[in] fn PPM filename to be loaded - * @param[out] img Reference to an unintialised image instance - */ - bool load_ppm(const std::string &fn, Image &img) - { - assert(!fn.empty()); - PPMLoader ppm; - ppm.open(fn); - ppm.init_image(img, Format::U8); - img.allocator()->allocate(); - if(ppm.is_open()) - { - std::cout << "Reading image " << fn << std::endl; - ppm.fill_image(img); - return true; - } - else - { - std::cout << "Cannot open " << fn << std::endl; - return false; - } - } - /** Creates and Image and draws a square in the specified coordinares. - * - * @param[out] img Reference to an unintialised image instance - * @param[in] img_width Width of the image to be created - * @param[in] img_height Height of the image to be created - * @param[in] square_center_x Coordinate along x-axis to be used as the center for the square - * @param[in] square_center_y Coordinate along y-axis to be used as the center for the square - * @param[in] square_size Size in pixels to be used for the square - */ - void init_img(Image &img, unsigned int img_width, unsigned int img_height, - unsigned int square_center_x, unsigned int square_center_y, - unsigned int square_size) - { - img.allocator()->init(TensorInfo(img_width, img_height, Format::U8)); - img.allocator()->allocate(); - const unsigned int square_half = square_size / 2; - // assert the square is in the bounds of the image - assert(square_center_x > square_half && square_center_x + square_half < img_width); - assert(square_center_y > square_half && square_center_y + square_half < img_height); - // get ptr to the top left pixel for the squeare - std::fill(img.buffer(), img.buffer() + img_width * img_height, 0); - for(unsigned int i = 0; i < square_size; ++i) - { - for(unsigned int j = 0; j < square_size; ++j) - { - uint8_t *ptr = img.ptr_to_element(Coordinates(square_center_x - square_half + j, square_center_y - square_half + i)); - *ptr = 0xFF; - } - } - } - /** Prints an array of keypoints and an optional label - * - * @param[in] a Keypoint array to be printed - * @param[in] str Label to be printed before the array - */ - void print_points(const KeyPointArray &a, const std::string &str = "") - { - std::cout << str << std::endl; - for(unsigned int k = 0; k < a.num_values(); ++k) - { - auto kp = a.at(k); - std::cout << "\t " - << " (x,y) = (" << kp.x << "," << kp.y << ")"; - std::cout << " strength = " << kp.strength << " " - << " scale = " << kp.scale << " orientation " << kp.orientation << " status " << kp.tracking_status << " err = " << kp.error << std::endl; - } - } - - Pyramid pyr_1st{}; - Pyramid pyr_2nd{}; - NEGaussianPyramidHalf pyrf_1st{}; - NEGaussianPyramidHalf pyrf_2nd{}; - NEOpticalFlow optkf{}; - Image src_1st{}, src_2nd{}; - KeyPointArray input_points; - KeyPointArray output_points; - KeyPointArray point_estimates; -}; - -/** Main program for optical flow test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example<NeonOpticalFlowExample>(argc, argv); -} diff --git a/examples/neon_permute.cpp b/examples/neon_permute.cpp index 05c8169020..76ba079430 100644 --- a/examples/neon_permute.cpp +++ b/examples/neon_permute.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2019 ARM Limited. + * Copyright (c) 2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,9 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEFunctions.h" -#include "arm_compute/core/Types.h" #include "utils/Utils.h" using namespace arm_compute; @@ -85,11 +85,13 @@ private: window.use_tensor_dimensions(reference.info()->tensor_shape()); Iterator ref_it(&reference, window); Iterator res_it(&result, window); - execute_window_loop(window, [&](const Coordinates &) - { - assert(*reinterpret_cast<unsigned char *>(ref_it.ptr()) == *reinterpret_cast<unsigned char *>(res_it.ptr())); - }, - ref_it, res_it); + execute_window_loop( + window, + [&](const Coordinates &) { + assert(*reinterpret_cast<unsigned char *>(ref_it.ptr()) == + *reinterpret_cast<unsigned char *>(res_it.ptr())); + }, + ref_it, res_it); } void fill_tensor(Tensor &tensor) @@ -98,11 +100,9 @@ private: window.use_tensor_dimensions(tensor.info()->tensor_shape()); Iterator tensor_it(&tensor, window); unsigned char val(0); - execute_window_loop(window, [&](const Coordinates &) - { - *reinterpret_cast<unsigned char *>(tensor_it.ptr()) = val++; - }, - tensor_it); + execute_window_loop( + window, [&](const Coordinates &) { *reinterpret_cast<unsigned char *>(tensor_it.ptr()) = val++; }, + tensor_it); } void init_tensor(const TensorShape shape, Tensor &tensor, DataType type, DataLayout layout) { diff --git a/examples/neon_scale.cpp b/examples/neon_scale.cpp index b04d916aaf..28590bd861 100644 --- a/examples/neon_scale.cpp +++ b/examples/neon_scale.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,9 +21,9 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEFunctions.h" -#include "arm_compute/core/Types.h" #include "utils/ImageLoader.h" #include "utils/Utils.h" @@ -37,7 +37,7 @@ public: { PPMLoader ppm; - if(argc < 2) + if (argc < 2) { // Print help std::cout << "Usage: ./build/neon_scale[input_image.ppm]\n\n"; @@ -60,14 +60,16 @@ public: dst.allocator()->init(dst_tensor_info); // Configure Scale function object: - scale.configure(&src, &dst, InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED); + scale.configure(&src, &dst, + ScaleKernelInfo{InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::UNDEFINED, PixelValue(), + SamplingPolicy::CENTER, false}); // Allocate all the images src.allocator()->allocate(); dst.allocator()->allocate(); // Fill the input image with the content of the PPM image if a filename was provided: - if(ppm.is_open()) + if (ppm.is_open()) { ppm.fill_image(src); output_filename = std::string(argv[1]) + "_out.ppm"; @@ -83,7 +85,7 @@ public: void do_teardown() override { // Save the result to file: - if(!output_filename.empty()) + if (!output_filename.empty()) { save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM } diff --git a/examples/neon_sgemm.cpp b/examples/neon_sgemm.cpp index 8f395dec9a..8cda65a400 100644 --- a/examples/neon_sgemm.cpp +++ b/examples/neon_sgemm.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018-2019 ARM Limited. + * Copyright (c) 2018-2019 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEFunctions.h" #include "arm_compute/runtime/NEON/NEScheduler.h" + #include "utils/Utils.h" #include <cstdlib> @@ -43,15 +44,16 @@ public: beta = 0.0f; std::ifstream stream; - if(argc > 1) + if (argc > 1) { stream.open(argv[1], std::fstream::in); } - if(argc < 3 || (argc < 4 && stream.bad())) + if (argc < 3 || (argc < 4 && stream.bad())) { // Print help - std::cout << "Usage: 1) ./build/neon_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] [alpha = 1] [beta = 0]\n"; + std::cout << "Usage: 1) ./build/neon_sgemm input_matrix_1.npy input_matrix_2.npy [input_matrix_3.npy] " + "[alpha = 1] [beta = 0]\n"; std::cout << " 2) ./build/neon_sgemm M N K [alpha = 1.0f] [beta = 0.0f]\n\n"; std::cout << "Too few or no input_matrices provided. Using M=7, N=3, K=5, alpha=1.0f and beta=0.0f\n\n"; @@ -61,29 +63,29 @@ public: } else { - if(stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */ + if (stream.good()) /* case file1.npy file2.npy [file3.npy] [alpha = 1.0f] [beta = 0.0f] */ { npy0.open(argv[1]); npy0.init_tensor(src0, DataType::F32); npy1.open(argv[2]); npy1.init_tensor(src1, DataType::F32); - if(argc > 3) + if (argc > 3) { stream.close(); stream.clear(); stream.open(argv[3], std::fstream::in); - if(stream.good()) /* case with third file */ + if (stream.good()) /* case with third file */ { npy2.open(argv[3]); npy2.init_tensor(src2, DataType::F32); - if(argc > 4) + if (argc > 4) { // Convert string to float alpha = strtof(argv[4], nullptr); - if(argc > 5) + if (argc > 5) { // Convert string to float beta = strtof(argv[5], nullptr); @@ -94,7 +96,7 @@ public: { alpha = strtof(argv[3], nullptr); - if(argc > 4) + if (argc > 4) { beta = strtof(argv[4], nullptr); } @@ -111,11 +113,11 @@ public: src1.allocator()->init(TensorInfo(TensorShape(N, K), 1, DataType::F32)); src2.allocator()->init(TensorInfo(TensorShape(N, M), 1, DataType::F32)); - if(argc > 4) + if (argc > 4) { alpha = strtof(argv[4], nullptr); - if(argc > 5) + if (argc > 5) { beta = strtof(argv[5], nullptr); } @@ -134,7 +136,7 @@ public: dst.allocator()->allocate(); // Fill the input images with either the data provided or random data - if(npy0.is_open()) + if (npy0.is_open()) { npy0.fill_tensor(src0); npy1.fill_tensor(src1); @@ -142,7 +144,7 @@ public: output_filename = "sgemm_out.npy"; is_fortran = npy0.is_fortran(); - if(npy2.is_open()) + if (npy2.is_open()) { src2.allocator()->allocate(); npy2.fill_tensor(src2); @@ -169,7 +171,7 @@ public: } void do_teardown() override { - if(!output_filename.empty()) /* Save to .npy file */ + if (!output_filename.empty()) /* Save to .npy file */ { save_to_npy(dst, output_filename, is_fortran); } diff --git a/examples/neoncl_scale_median_gaussian.cpp b/examples/neoncl_scale_median_gaussian.cpp deleted file mode 100644 index 1b26517d9f..0000000000 --- a/examples/neoncl_scale_median_gaussian.cpp +++ /dev/null @@ -1,141 +0,0 @@ -/* - * Copyright (c) 2016-2018 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CL /* Needed by Utils.cpp to handle OpenCL exceptions properly */ -#error "This example needs to be built with -DARM_COMPUTE_CL" -#endif /* ARM_COMPUTE_CL */ - -#include "arm_compute/core/Types.h" -#include "arm_compute/runtime/CL/CLFunctions.h" -#include "arm_compute/runtime/CL/CLScheduler.h" -#include "arm_compute/runtime/NEON/NEFunctions.h" -#include "utils/ImageLoader.h" -#include "utils/Utils.h" - -using namespace arm_compute; -using namespace utils; - -/** Example demonstrating how to use both CL and NEON functions in the same pipeline - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -class NEONCLScaleMedianGaussianExample : public Example -{ -public: - bool do_setup(int argc, char **argv) override - { - /** [NEON / OpenCL Interop] */ - PPMLoader ppm; - - CLScheduler::get().default_init(); - - if(argc < 2) - { - // Print help - std::cout << "Usage: ./build/cl_convolution [input_image.ppm]\n\n"; - std::cout << "No input_image provided, creating a dummy 640x480 image\n"; - // Create an empty grayscale 640x480 image - src.allocator()->init(TensorInfo(640, 480, Format::U8)); - } - else - { - ppm.open(argv[1]); - ppm.init_image(src, Format::U8); - } - - TensorInfo scale_median_info(TensorInfo(src.info()->dimension(0) / 2, src.info()->dimension(1) / 2, Format::U8)); - - // Configure the temporary and destination images - scale_median.allocator()->init(scale_median_info); - median_gauss.allocator()->init(scale_median_info); - dst.allocator()->init(scale_median_info); - - scale.configure(&src, &scale_median, InterpolationPolicy::NEAREST_NEIGHBOR, BorderMode::REPLICATE); - median.configure(&scale_median, &median_gauss, BorderMode::REPLICATE); - gauss.configure(&median_gauss, &dst, BorderMode::REPLICATE); - - // Allocate all the images - src.allocator()->allocate(); - scale_median.allocator()->allocate(); - median_gauss.allocator()->allocate(); - dst.allocator()->allocate(); - - // Fill the input image with the content of the PPM image if a filename was provided: - if(ppm.is_open()) - { - ppm.fill_image(src); - const std::string output_filename = std::string(argv[1]) + "_out.ppm"; - } - /** [NEON / OpenCL Interop] */ - - return true; - } - void do_run() override - { - // Enqueue and flush the OpenCL kernel: - scale.run(); - - // Do a blocking map of the input and output buffers of the NEON function: - scale_median.map(); - median_gauss.map(); - - // Run the NEON function: - median.run(); - - // Unmap the output buffer before it's used again by OpenCL: - scale_median.unmap(); - median_gauss.unmap(); - - // Run the final OpenCL function: - gauss.run(); - - // Make sure all the OpenCL jobs are done executing: - CLScheduler::get().sync(); - } - void do_teardown() override - { - // Save the result to file: - if(!output_filename.empty()) - { - save_to_ppm(dst, output_filename); // save_to_ppm maps and unmaps the image to store as PPM - } - } - -private: - CLImage src{}, scale_median{}, median_gauss{}, dst{}; - CLScale scale{}; - NEMedian3x3 median{}; - CLGaussian5x5 gauss{}; - std::string output_filename{}; -}; - -/** Main program for neon/cl scale median gaussian test - * - * @param[in] argc Number of arguments - * @param[in] argv Arguments ( [optional] Path to PPM image to process ) - */ -int main(int argc, char **argv) -{ - return utils::run_example<NEONCLScaleMedianGaussianExample>(argc, argv); -} |