aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-09-14 12:33:34 +0100
committerFreddie Liardet <frederick.liardet@arm.com>2021-10-07 10:59:05 +0000
commitb6af482bc5d8e4f03f876e17909c561de198c4d3 (patch)
treef32c3a796cad01ffc27a4da2e8141cdf451ca453
parent58e9e06102da7042bed34482ae89b3a6f8c77dca (diff)
downloadComputeLibrary-b6af482bc5d8e4f03f876e17909c561de198c4d3.tar.gz
Per-operator build dependencies
Creates a list of operators their respective dependencies. Alters the build system to walk-through them resolve the dependencies and build Compute Library. Removes the following unused kernels/functions: -[NE|CL]MinMaxLayerKernel -CLFillBorder Resolves: COMPMID-4695,COMPMID-4696 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I35ebeef38dac25ec5459cfe9c5f7c9a708621124 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/357914 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Michele DiGiorgio <michele.digiorgio@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Signed-off-by: Freddie Liardet <frederick.liardet@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6295 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp3
-rw-r--r--SConscript305
-rw-r--r--SConstruct12
-rw-r--r--arm_compute/runtime/CL/CLFunctions.h1
-rw-r--r--arm_compute/runtime/CL/functions/CLFillBorder.h67
-rw-r--r--docs/user_guide/library.dox29
-rw-r--r--docs/user_guide/operator_list.dox17
-rw-r--r--docs/user_guide/release_version_and_change_log.dox4
-rw-r--r--filelist.json2551
-rw-r--r--src/core/CL/CLKernels.h1
-rw-r--r--src/core/CL/kernels/CLMinMaxLayerKernel.cpp169
-rw-r--r--src/core/CL/kernels/CLMinMaxLayerKernel.h87
-rw-r--r--src/core/NEON/NEKernels.h1
-rw-r--r--src/core/NEON/kernels/NEMinMaxLayerKernel.cpp224
-rw-r--r--src/core/NEON/kernels/NEMinMaxLayerKernel.h90
-rw-r--r--src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp2
-rw-r--r--src/runtime/CL/functions/CLFillBorder.cpp45
-rw-r--r--tests/framework/instruments/OpenCLTimer.cpp45
-rw-r--r--tests/framework/instruments/OpenCLTimer.h10
-rw-r--r--tests/framework/instruments/SchedulerTimer.cpp34
-rw-r--r--tests/framework/instruments/SchedulerTimer.h16
21 files changed, 1636 insertions, 2077 deletions
diff --git a/Android.bp b/Android.bp
index 9b6808eb9a..8b73de5f2f 100644
--- a/Android.bp
+++ b/Android.bp
@@ -226,7 +226,6 @@ cc_library_static {
"src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp",
"src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp",
"src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp",
- "src/core/CL/kernels/CLMinMaxLayerKernel.cpp",
"src/core/CL/kernels/CLNormalizationLayerKernel.cpp",
"src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp",
"src/core/CL/kernels/CLPadLayerKernel.cpp",
@@ -280,7 +279,6 @@ cc_library_static {
"src/core/NEON/kernels/NELogicalKernel.cpp",
"src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp",
"src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp",
- "src/core/NEON/kernels/NEMinMaxLayerKernel.cpp",
"src/core/NEON/kernels/NENormalizationLayerKernel.cpp",
"src/core/NEON/kernels/NEPadLayerKernel.cpp",
"src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp",
@@ -639,7 +637,6 @@ cc_library_static {
"src/runtime/CL/functions/CLFFT2D.cpp",
"src/runtime/CL/functions/CLFFTConvolutionLayer.cpp",
"src/runtime/CL/functions/CLFill.cpp",
- "src/runtime/CL/functions/CLFillBorder.cpp",
"src/runtime/CL/functions/CLFlattenLayer.cpp",
"src/runtime/CL/functions/CLFloor.cpp",
"src/runtime/CL/functions/CLFullyConnectedLayer.cpp",
diff --git a/SConscript b/SConscript
index df8f33a917..c88a86773c 100644
--- a/SConscript
+++ b/SConscript
@@ -38,27 +38,27 @@ Import('vars')
Import('install_lib')
def build_bootcode_objs(sources):
-
arm_compute_env.Append(ASFLAGS = "-I bootcode/")
obj = arm_compute_env.Object(sources)
obj = install_lib(obj)
Default(obj)
return obj
-def build_sve_objs(sources):
+def build_sve_objs(sources):
tmp_env = arm_compute_env.Clone()
tmp_env.Append(CXXFLAGS = "-march=armv8.2-a+sve+fp16")
obj = tmp_env.SharedObject(sources)
Default(obj)
return obj
-def build_objs(sources):
+def build_objs(sources):
obj = arm_compute_env.SharedObject(sources)
Default(obj)
return obj
+
def build_library(name, build_env, sources, static=False, libs=[]):
if static:
obj = build_env.StaticLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs)
@@ -72,6 +72,7 @@ def build_library(name, build_env, sources, static=False, libs=[]):
Default(obj)
return obj
+
def remove_incode_comments(code):
def replace_with_empty(match):
s = match.group(0)
@@ -83,6 +84,7 @@ def remove_incode_comments(code):
comment_regex = re.compile(r'//.*?$|/\*.*?\*/|\'(?:\\.|[^\\\'])*\'|"(?:\\.|[^\\"])*"', re.DOTALL | re.MULTILINE)
return re.sub(comment_regex, replace_with_empty, code)
+
def resolve_includes(target, source, env):
# File collection
FileEntry = collections.namedtuple('FileEntry', 'target_name file_contents')
@@ -142,6 +144,7 @@ def resolve_includes(target, source, env):
file_to_write = "R\"(" + file_to_write + ")\""
out_file.write(file_to_write)
+
def create_version_file(target, source, env):
# Generate string with build options library version to embed in the library:
try:
@@ -153,72 +156,87 @@ def create_version_file(target, source, env):
with open(target[0].get_path(), "w") as fd:
fd.write(build_info)
-def get_cpu_runtime_files(operator):
- file_list = []
- operators = filelist['cpu']['operators']
-
- if "operator" in operators[operator]["files"]:
- file_list += operators[operator]["files"]["operator"]
- return file_list
-
-def get_gpu_runtime_files(operator):
- file_list = []
- operators = filelist['gpu']['operators']
-
- if "operator" in operators[operator]["files"]:
- file_list += operators[operator]["files"]["operator"]
- return file_list
-
-def get_cpu_kernel_files(operator):
-
- file_list = []
- file_list_sve = []
- operators = filelist['cpu']['operators']
-
- if env['estate'] == '64' and "neon" in operators[operator]['files'] and "estate64" in operators[operator]['files']['neon']:
- file_list += operators[operator]['files']['neon']['estate64']
- if env['estate'] == '32' and "neon" in operators[operator]['files'] and "estate32" in operators[operator]['files']['neon']:
- file_list += operators[operator]['files']['neon']['estate32']
-
- if "kernel" in operators[operator]["files"]:
- file_list += operators[operator]["files"]["kernel"]
-
- if ("neon" in operators[operator]["files"]):
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8']) and ("qasymm8" in operators[operator]["files"]["neon"]):
- file_list += operators[operator]["files"]["neon"]["qasymm8"]
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']) and ("qasymm8_signed" in operators[operator]["files"]["neon"]):
- file_list += operators[operator]["files"]["neon"]["qasymm8_signed"]
- if any(i in env['data_type_support'] for i in ['all', 'qsymm16']) and ("qsymm16" in operators[operator]["files"]["neon"]):
- file_list += operators[operator]["files"]["neon"]["qsymm16"]
- if any(i in env['data_type_support'] for i in ['all', 'integer']) and ("integer" in operators[operator]["files"]["neon"]):
- file_list += operators[operator]["files"]["neon"]["integer"]
-
- if (not "sve" in env['arch'] or env['fat_binary']) and ("neon" in operators[operator]["files"]):
- if any(i in env['data_type_support'] for i in ['all', 'fp16']) and ("fp16" in operators[operator]["files"]["neon"]):
- file_list += operators[operator]["files"]["neon"]["fp16"]
- if any(i in env['data_type_support'] for i in ['all', 'fp32']) and ("fp32" in operators[operator]["files"]["neon"]):
- file_list += operators[operator]["files"]["neon"]["fp32"]
- if any(i in env['data_layout_support'] for i in ['all', 'nchw']) and ("nchw" in operators[operator]["files"]["neon"]):
- file_list += operators[operator]['files']['neon']['nchw']
- if ("all" in operators[operator]["files"]["neon"]):
- file_list += operators[operator]["files"]["neon"]["all"]
- if ("sve" in env['arch'] or env['fat_binary']) and ("sve" in operators[operator]["files"]):
- if any(i in env['data_type_support'] for i in ['all', 'fp16']) and ("fp16" in operators[operator]["files"]["sve"]):
- file_list_sve += operators[operator]["files"]["sve"]["fp16"]
- if any(i in env['data_type_support'] for i in ['all', 'fp32']) and ("fp32" in operators[operator]["files"]["sve"]):
- file_list_sve += operators[operator]["files"]["sve"]["fp32"]
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8']) and ("qasymm8" in operators[operator]["files"]["sve"]):
- file_list_sve += operators[operator]["files"]["sve"]["qasymm8"]
- if any(i in env['data_type_support'] for i in ['all', 'qasymm8_signed']) and ("qasymm8_signed" in operators[operator]["files"]["sve"]):
- file_list_sve += operators[operator]["files"]["sve"]["qasymm8_signed"]
- if any(i in env['data_type_support'] for i in ['all', 'qsymm16']) and ("qsymm16" in operators[operator]["files"]["sve"]):
- file_list_sve += operators[operator]["files"]["sve"]["qsymm16"]
- if any(i in env['data_type_support'] for i in ['all', 'integer']) and ("integer" in operators[operator]["files"]["sve"]):
- file_list_sve += operators[operator]["files"]["sve"]["integer"]
- if ("all" in operators[operator]["files"]["sve"]):
- file_list_sve += operators[operator]["files"]["sve"]["all"]
-
- return file_list, file_list_sve
+
+def get_attrs_list(arch, estate, data_types, data_layouts):
+ attrs = []
+
+ # Manage data-types
+ if any(i in data_types for i in ['all']):
+ attrs += ['fp16', 'fp32', 'integer', 'qasymm8', 'qasymm8_signed', 'qsymm16']
+ else:
+ if any(i in data_types for i in ['fp16']): attrs += ['fp16']
+ if any(i in data_types for i in ['fp32']): attrs += ['fp32']
+ if any(i in data_types for i in ['integer']): attrs += ['integer']
+ if any(i in data_types for i in ['qasymm8']): attrs += ['qasymm8']
+ if any(i in data_types for i in ['qasymm8_signed']): attrs += ['qasymm8_signed']
+ if any(i in data_types for i in ['qsymm16']): attrs += ['qsymm16']
+
+ # Manage data-layouts
+ if any(i in data_layouts for i in ['all']):
+ attrs += ['nhwc', 'nchw']
+ else:
+ if any(i in data_layouts for i in ['nhwc']): attrs += ['nhwc']
+ if any(i in data_layouts for i in ['nchw']): attrs += ['nchw']
+
+ # Manage execution state
+ estate_attr = 'estate32' if (estate == 'auto' and 'v7a' in arch) or '32' in estate else 'estate64'
+ attrs += [ estate_attr ]
+
+ return attrs
+
+
+def get_operator_backend_files(filelist, operators, backend='', techs=[], attrs=[]):
+ files = { "common" : [] }
+
+ # Early return if filelist is empty
+ if backend not in filelist:
+ return files
+
+ # Iterate over operators and create the file lists to compiler
+ for operator in operators:
+ if operator in filelist[backend]['operators']:
+ files['common'] += filelist[backend]['operators'][operator]["files"]["common"]
+ for tech in techs:
+ if tech in filelist[backend]['operators'][operator]["files"]:
+ # Add tech as a key to dictionary if not there
+ if tech not in files:
+ files[tech] = []
+
+ # Add tech files to the tech file list
+ tech_files = filelist[backend]['operators'][operator]["files"][tech]
+ files[tech] += tech_files.get('common', [])
+ for attr in attrs:
+ files[tech] += tech_files.get(attr, [])
+
+ # Remove duplicates if they exist
+ return {k: list(set(v)) for k,v in files.items()}
+
+def collect_operators(filelist, operators, backend=''):
+ ops = set()
+ for operator in operators:
+ if operator in filelist[backend]['operators']:
+ ops.add(operator)
+ if 'deps' in filelist[backend]['operators'][operator]:
+ ops.update(filelist[backend]['operators'][operator]['deps'])
+ else:
+ print("Operator {0} is unsupported on {1} backend!".format(operator, backend))
+
+ return ops
+
+
+def resolve_operator_dependencies(filelist, operators, backend=''):
+ resolved_operators = collect_operators(filelist, operators, backend)
+
+ are_ops_resolved = False
+ while not are_ops_resolved:
+ resolution_pass = collect_operators(filelist, resolved_operators, backend)
+ if len(resolution_pass) != len(resolved_operators):
+ resolved_operators.update(resolution_pass)
+ else:
+ are_ops_resolved = True
+
+ return resolved_operators
+
arm_compute_env = env.Clone()
version_file = arm_compute_env.Command("src/core/arm_compute_version.embed", "", action=create_version_file)
@@ -385,70 +403,61 @@ arm_compute_env.Append(LIBS = ['dl'])
with (open(Dir('#').path + '/filelist.json')) as fp:
filelist = json.load(fp)
-core_files = Glob('src/core/*.cpp')
-core_files += Glob('src/core/CPP/*.cpp')
-core_files += Glob('src/core/CPP/kernels/*.cpp')
-core_files += Glob('src/core/helpers/*.cpp')
-core_files += Glob('src/core/utils/*.cpp')
-core_files += Glob('src/core/utils/helpers/*.cpp')
-core_files += Glob('src/core/utils/io/*.cpp')
-core_files += Glob('src/core/utils/quantization/*.cpp')
-core_files += Glob('src/core/utils/misc/*.cpp')
-if env["logging"]:
- core_files += Glob('src/core/utils/logging/*.cpp')
+# Common backend files
+lib_files = filelist['common']
-runtime_files_hp = Glob('src/runtime/*.cpp')
-runtime_files_hp += Glob('src/runtime/CPP/ICPPSimpleFunction.cpp')
-runtime_files = Glob('src/runtime/CPP/functions/*.cpp')
+# Logging files
+if env["logging"]:
+ lib_files += filelist['logging']
# C API files
-runtime_files_hp += filelist['c_api']['common']
-runtime_files_hp += filelist['c_api']['operators']
+lib_files += filelist['c_api']['common']
+lib_files += filelist['c_api']['operators']
-if env['opencl']:
- runtime_files_hp += filelist['c_api']['gpu']
-
-# Common backend files
-core_files += filelist['common']
-
-# Initialize high priority core files
-core_files_hp = core_files
-core_files_sve_hp = []
-core_files = []
-
-runtime_files_hp += Glob('src/runtime/CPP/SingleThreadScheduler.cpp')
+# Scheduler infrastructure
+lib_files += filelist['scheduler']['single']
+if env['cppthreads']:
+ lib_files += filelist['scheduler']['threads']
+if env['openmp']:
+ lib_files += filelist['scheduler']['omp']
+# Graph files
graph_files = Glob('src/graph/*.cpp')
graph_files += Glob('src/graph/*/*.cpp')
-if env['cppthreads']:
- runtime_files_hp += Glob('src/runtime/CPP/CPPScheduler.cpp')
-
-if env['openmp']:
- runtime_files_hp += Glob('src/runtime/OMP/OMPScheduler.cpp')
+# Specify user-defined priority operators
+use_priority_ops = env['high_priority']
+priority_operators = filelist['high_priority']
+if env['build_config'] != "":
+ build_config = env['build_config']
+ build_config_contents = {}
+ if os.path.isfile(build_config):
+ with open(build_config) as f:
+ try:
+ build_config_contents = json.load(f)
+ except:
+ print("Warning: Build configuration file is of invalid JSON format!")
+ else:
+ try:
+ build_config_contents = json.loads(build_config)
+ except:
+ print("Warning: Build configuration string is of invalid JSON format!")
+ if build_config_contents:
+ priority_operators = build_config_contents.get("operators", [])
if env['opencl']:
- operators = filelist['gpu']['operators']
- for operator in operators:
- if operator in filelist['gpu']['high_priority']:
- runtime_files_hp += get_gpu_runtime_files(operator)
- if "kernel" in operators[operator]["files"]:
- core_files_hp += operators[operator]["files"]["kernel"]
- else:
- runtime_files += get_gpu_runtime_files(operator)
- if "kernel" in operators[operator]["files"]:
- core_files += operators[operator]["files"]["kernel"]
+ lib_files += filelist['c_api']['gpu']
+ lib_files += filelist['gpu']['common']
- runtime_files_hp += filelist['gpu']['common']
- runtime_files += Glob('src/runtime/CL/functions/*.cpp')
+ cl_operators = priority_operators if use_priority_ops else filelist['gpu']['operators'].keys()
+ cl_ops_to_build = resolve_operator_dependencies(filelist, cl_operators, 'gpu')
+ lib_files += get_operator_backend_files(filelist, cl_ops_to_build, 'gpu')['common']
graph_files += Glob('src/graph/backends/CL/*.cpp')
sve_o = []
-core_files_sve = []
+lib_files_sve = []
if env['neon']:
- core_files += Glob('src/core/NEON/*.cpp')
-
# build winograd/depthwise sources for either v7a / v8a
arm_compute_env.Append(CPPPATH = ["src/core/NEON/kernels/convolution/common/",
"src/core/NEON/kernels/convolution/winograd/",
@@ -457,58 +466,55 @@ if env['neon']:
"arm_compute/core/NEON/kernels/assembly/",
"src/cpu/kernels/assembly/",])
- # Load files based on user's options
- operators = filelist['cpu']['operators']
- for operator in operators:
- if operator in filelist['cpu']['high_priority']:
- runtime_files_hp += get_cpu_runtime_files(operator)
- file_list, file_list_sve = get_cpu_kernel_files(operator)
- core_files_hp += file_list
- core_files_sve_hp += file_list_sve
- else:
- runtime_files += get_cpu_runtime_files(operator)
- file_list, file_list_sve = get_cpu_kernel_files(operator)
- core_files += file_list
- core_files_sve += file_list_sve
+ lib_files += filelist['cpu']['common']
+
+ # Setup SIMD file list to include
+ simd = []
+ if 'sve' in env['arch'] or env['fat_binary']: simd += ['sve']
+ if 'sve' not in env['arch'] or env['fat_binary']: simd += ['neon']
- runtime_files_hp += filelist['cpu']['common']
- runtime_files_hp += Glob('src/runtime/NEON/*.cpp')
- runtime_files += Glob('src/runtime/NEON/functions/*.cpp')
+ # Get attributes
+ attrs = get_attrs_list(env['arch'], env['estate'], env['data_type_support'], env['data_layout_support'])
+
+ # Setup data-type and data-layout files to include
+ cpu_operators = priority_operators if use_priority_ops else filelist['cpu']['operators'].keys()
+ cpu_ops_to_build = resolve_operator_dependencies(filelist, filelist['cpu']['operators'], 'cpu')
+ cpu_files = get_operator_backend_files(filelist, cpu_ops_to_build, 'cpu', simd, attrs)
+ lib_files += cpu_files.get('common', [])
+ lib_files += cpu_files.get('neon', [])
+ lib_files_sve += cpu_files.get('sve', [])
graph_files += Glob('src/graph/backends/NEON/*.cpp')
+# Restrict from building graph API if a reduced operator list has been provided
+if use_priority_ops:
+ print("Graph library requires all operators to be built")
+ graph_files = []
+
+# Build bootcode in case of bare-metal
bootcode_o = []
if env['os'] == 'bare_metal':
bootcode_files = Glob('bootcode/*.s')
bootcode_o = build_bootcode_objs(bootcode_files)
Export('bootcode_o')
-high_priority_o = build_objs(core_files_hp + runtime_files_hp)
-high_priority_sve_o = []
+# Build static libraries
if (env['fat_binary']):
- sve_o = build_sve_objs(core_files_sve)
- high_priority_sve_o = build_sve_objs(core_files_sve_hp)
- arm_compute_a = build_library('arm_compute-static', arm_compute_env, core_files + sve_o + high_priority_o + high_priority_sve_o + runtime_files, static=True)
+ sve_o = build_sve_objs(lib_files_sve)
+ arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + sve_o, static=True)
else:
- high_priority_o += build_objs(core_files_sve_hp)
- arm_compute_a = build_library('arm_compute-static', arm_compute_env, core_files + core_files_sve + high_priority_o + runtime_files, static=True)
+ arm_compute_a = build_library('arm_compute-static', arm_compute_env, lib_files + lib_files_sve, static=True)
Export('arm_compute_a')
-if env['high_priority']:
- arm_compute_hp_a = build_library('arm_compute_hp-static', arm_compute_env, high_priority_o + high_priority_sve_o, static=True)
- Export('arm_compute_hp_a')
+# Build shared libraries
if env['os'] != 'bare_metal' and not env['standalone']:
if (env['fat_binary']):
- arm_compute_so = build_library('arm_compute', arm_compute_env, core_files + sve_o + high_priority_sve_o + high_priority_o + runtime_files, static=False)
+ arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + sve_o, static=False)
else:
- arm_compute_so = build_library('arm_compute', arm_compute_env, core_files + core_files_sve + high_priority_o + runtime_files , static=False)
+ arm_compute_so = build_library('arm_compute', arm_compute_env, lib_files + lib_files_sve, static=False)
Export('arm_compute_so')
- if env['high_priority']:
- arm_compute_hp_so = build_library('arm_compute_hp', arm_compute_env, high_priority_sve_o + high_priority_o, static=False)
- Export('arm_compute_hp_so')
-
# Generate dummy core lib for backwards compatibility
arm_compute_core_a = build_library('arm_compute_core-static', arm_compute_env, [], static=True)
Export('arm_compute_core_a')
@@ -519,6 +525,7 @@ if env['os'] != 'bare_metal' and not env['standalone']:
arm_compute_graph_env = arm_compute_env.Clone()
+# Build graph libraries
arm_compute_graph_env.Append(CXXFLAGS = ['-Wno-redundant-move', '-Wno-pessimizing-move'])
arm_compute_graph_a = build_library('arm_compute_graph-static', arm_compute_graph_env, graph_files, static=True, libs = [ arm_compute_a])
diff --git a/SConstruct b/SConstruct
index ee8108bf71..7591075cd1 100644
--- a/SConstruct
+++ b/SConstruct
@@ -23,8 +23,10 @@
# SOFTWARE.
import SCons
+import json
import os
import subprocess
+import sys
def version_at_least(version, required):
@@ -76,7 +78,8 @@ vars.AddVariables(
("extra_cxx_flags", "Extra CXX flags to be appended to the build command", ""),
("extra_link_flags", "Extra LD flags to be appended to the build command", ""),
("compiler_cache", "Command to prefix to the C and C++ compiler (e.g ccache)", ""),
- ("specs_file", "Specs file to use (e.g. rdimon.specs)", "")
+ ("specs_file", "Specs file to use (e.g. rdimon.specs)", ""),
+ ("build_config", "Operator/Data-type/Data-layout configuration to use for tailored ComputeLibrary builds. Can be a JSON file or a JSON formatted string", "")
)
env = Environment(platform="posix", variables=vars, ENV = os.environ)
@@ -317,6 +320,13 @@ if env['fat_binary']:
'-DARM_COMPUTE_ENABLE_FP16', '-DARM_COMPUTE_ENABLE_BF16',
'-DARM_COMPUTE_ENABLE_I8MM', '-DARM_COMPUTE_ENABLE_SVEF32MM'])
+if env['high_priority'] and env['build_config']:
+ print("The high priority library cannot be built in conjuction with a user-specified build configuration")
+ Exit(1)
+
+if not env['high_priority'] and not env['build_config']:
+ env.Append(CPPDEFINES = ['ARM_COMPUTE_GRAPH_ENABLED'])
+
if env['data_type_support']:
if any(i in env['data_type_support'] for i in ['all', 'fp16']):
env.Append(CXXFLAGS = ['-DENABLE_FP16_KERNELS'])
diff --git a/arm_compute/runtime/CL/CLFunctions.h b/arm_compute/runtime/CL/CLFunctions.h
index 62c94152e8..442d407660 100644
--- a/arm_compute/runtime/CL/CLFunctions.h
+++ b/arm_compute/runtime/CL/CLFunctions.h
@@ -57,7 +57,6 @@
#include "arm_compute/runtime/CL/functions/CLFFT2D.h"
#include "arm_compute/runtime/CL/functions/CLFFTConvolutionLayer.h"
#include "arm_compute/runtime/CL/functions/CLFill.h"
-#include "arm_compute/runtime/CL/functions/CLFillBorder.h"
#include "arm_compute/runtime/CL/functions/CLFlattenLayer.h"
#include "arm_compute/runtime/CL/functions/CLFloor.h"
#include "arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h"
diff --git a/arm_compute/runtime/CL/functions/CLFillBorder.h b/arm_compute/runtime/CL/functions/CLFillBorder.h
deleted file mode 100644
index 20f2e15b72..0000000000
--- a/arm_compute/runtime/CL/functions/CLFillBorder.h
+++ /dev/null
@@ -1,67 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLFILLBORDER_H
-#define ARM_COMPUTE_CLFILLBORDER_H
-
-#include "arm_compute/core/PixelValue.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/runtime/CL/ICLSimpleFunction.h"
-
-namespace arm_compute
-{
-class CLCompileContext;
-class ICLTensor;
-
-/** Basic function to run @ref CLFillBorderKernel */
-class CLFillBorder : public ICLSimpleFunction
-{
-public:
- /** Initialize the function
- *
- * Valid data layouts:
- * - All
- *
- * Valid data type configurations:
- * |src |dst |
- * |:--------------|:--------------|
- * |All |All |
- *
- * @param[in,out] tensor Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
- * @param[in] border_width The border width
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
- /** Initialize the function
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in,out] tensor Source tensor. Data types supported: U8/QASYMM8/S8/QASYMM8_SIGNED/U16/S16/U32/S32/F16/F32.
- * @param[in] border_width The border width
- * @param[in] border_mode Strategy to use for borders.
- * @param[in] constant_border_value (Optional) Constant value to use for borders if border_mode is set to CONSTANT.
- */
- void configure(const CLCompileContext &compile_context, ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value = PixelValue());
-};
-}
-#endif /*ARM_COMPUTE_FILLBORDER_H */
diff --git a/docs/user_guide/library.dox b/docs/user_guide/library.dox
index 6c7b7e941f..fc08dbc437 100644
--- a/docs/user_guide/library.dox
+++ b/docs/user_guide/library.dox
@@ -561,6 +561,35 @@ Selecting fat_binary when building Compute Library, will create a library that c
Based on the CPU support, the appropriate kernel will be selected at runtime for execution. Currently this option is
only supported with armv8.2-a as the base architecture.
+@subsection architecture_experimental_per_operator_build Per-operator build
+
+Dependencies for all operators have been explicitly defined, this provides the ability to users to generate Compute Library
+binaries that include a user-defined list of operators.
+
+An experimental flag 'build_config' has been introduced where a JSON configuration file can be provided and consumed.
+An example config looks like:
+@code{.py}
+{
+ "operators": [
+ "Activation",
+ "DepthwiseConv2d",
+ "Conv2d",
+ "Permute",
+ "Pool2d",
+ "Reshape"
+ ],
+ "data_types": [
+ "NHWC"
+ ]
+}
+@endcode
+
+Supported data-types options are:
+- "NHWC"
+- "NCHW"
+
+The list of supported operators can be found in filelist.json in the root of Compute Library repo.
+
@subsection architecture_experimental_build_high_priority_operators Build high priority operators
Selecting high_priority when building Compute Library, one new library will be created: libarm_compute_hp and
diff --git a/docs/user_guide/operator_list.dox b/docs/user_guide/operator_list.dox
index 92b8f9b482..27ba52d72e 100644
--- a/docs/user_guide/operator_list.dox
+++ b/docs/user_guide/operator_list.dox
@@ -1404,9 +1404,9 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
- <td rowspan="2">FillBorder
- <td rowspan="2" style="width:200px;"> Function to fill the borders within the XY-planes.
- <td rowspan="2">
+ <td rowspan="1">FillBorder
+ <td rowspan="1" style="width:200px;"> Function to fill the borders within the XY-planes.
+ <td rowspan="1">
<ul>
<li>n/a
</ul>
@@ -1421,17 +1421,6 @@ where N = batches, C = channels, H = height, W = width
<tr><td>All<td>All
</table>
<tr>
- <td>CLFillBorder
- <td>
- <ul>
- <li>All
- </ul>
- <td>
- <table>
- <tr><th>src<th>dst
- <tr><td>All<td>All
- </table>
-<tr>
<td rowspan="2">FlattenLayer
<td rowspan="2" style="width:200px;"> Reshape a tensor to be 1D
<td rowspan="2">
diff --git a/docs/user_guide/release_version_and_change_log.dox b/docs/user_guide/release_version_and_change_log.dox
index 2eb9aacce7..583cf4fb82 100644
--- a/docs/user_guide/release_version_and_change_log.dox
+++ b/docs/user_guide/release_version_and_change_log.dox
@@ -1315,7 +1315,7 @@ v17.09 Public major release
- NEDequantizationLayerKernel / @ref NEDequantizationLayer
- NEFloorKernel / @ref NEFloor
- @ref NEL2NormalizeLayerKernel / @ref NEL2NormalizeLayer
- - NEQuantizationLayerKernel @ref NEMinMaxLayerKernel / @ref NEQuantizationLayer
+ - NEQuantizationLayerKernel NEMinMaxLayerKernel / @ref NEQuantizationLayer
- @ref NEROIPoolingLayerKernel / @ref NEROIPoolingLayer
- @ref NEReductionOperationKernel / @ref NEReductionOperation
- NEReshapeLayerKernel / @ref NEReshapeLayer
@@ -1329,7 +1329,7 @@ v17.09 Public major release
- CLGEMMTranspose1xW
- CLGEMMMatrixVectorMultiplyKernel
- @ref CLL2NormalizeLayerKernel / @ref CLL2NormalizeLayer
- - CLQuantizationLayerKernel @ref CLMinMaxLayerKernel / @ref CLQuantizationLayer
+ - CLQuantizationLayerKernel CLMinMaxLayerKernel / @ref CLQuantizationLayer
- @ref CLROIPoolingLayerKernel / @ref CLROIPoolingLayer
- @ref CLReductionOperationKernel / @ref CLReductionOperation
- CLReshapeLayerKernel / @ref CLReshapeLayer
diff --git a/filelist.json b/filelist.json
index 5171f39e12..4b85408e3d 100644
--- a/filelist.json
+++ b/filelist.json
@@ -7,8 +7,80 @@
"src/common/AllocatorWrapper.cpp",
"src/common/ITensorV2.cpp",
"src/common/TensorPack.cpp",
- "src/common/IOperator.cpp"
+ "src/common/IOperator.cpp",
+ "src/core/AccessWindowAutoPadding.cpp",
+ "src/core/AccessWindowStatic.cpp",
+ "src/core/AccessWindowTranspose.cpp",
+ "src/core/Error.cpp",
+ "src/core/GPUTarget.cpp",
+ "src/core/Helpers.cpp",
+ "src/core/IAccessWindow.cpp",
+ "src/core/IKernel.cpp",
+ "src/core/ITensor.cpp",
+ "src/core/ITensorPack.cpp",
+ "src/core/Rounding.cpp",
+ "src/core/Size2D.cpp",
+ "src/core/SubTensorInfo.cpp",
+ "src/core/TensorInfo.cpp",
+ "src/core/Utils.cpp",
+ "src/core/Validate.cpp",
+ "src/core/Version.cpp",
+ "src/core/helpers/SoftmaxHelpers.cpp",
+ "src/core/helpers/WindowHelpers.cpp",
+ "src/core/utils/AssemblyUtils.cpp",
+ "src/core/utils/ScaleUtils.cpp",
+ "src/core/utils/helpers/fft.cpp",
+ "src/core/utils/helpers/tensor_transform.cpp",
+ "src/core/utils/io/FileHandler.cpp",
+ "src/core/utils/misc/MMappedFile.cpp",
+ "src/core/utils/quantization/AsymmHelpers.cpp",
+ "src/core/CPP/CPPTypes.cpp",
+ "src/core/CPP/kernels/CPPBoxWithNonMaximaSuppressionLimitKernel.cpp",
+ "src/core/CPP/kernels/CPPNonMaximumSuppressionKernel.cpp",
+ "src/core/CPP/kernels/CPPPermuteKernel.cpp",
+ "src/core/CPP/kernels/CPPTopKVKernel.cpp",
+ "src/core/CPP/kernels/CPPUpsampleKernel.cpp",
+ "src/runtime/Allocator.cpp",
+ "src/runtime/BlobLifetimeManager.cpp",
+ "src/runtime/BlobMemoryPool.cpp",
+ "src/runtime/ISimpleLifetimeManager.cpp",
+ "src/runtime/ITensorAllocator.cpp",
+ "src/runtime/IWeightsManager.cpp",
+ "src/runtime/IScheduler.cpp",
+ "src/runtime/Memory.cpp",
+ "src/runtime/MemoryManagerOnDemand.cpp",
+ "src/runtime/OffsetLifetimeManager.cpp",
+ "src/runtime/OffsetMemoryPool.cpp",
+ "src/runtime/OperatorTensor.cpp",
+ "src/runtime/PoolManager.cpp",
+ "src/runtime/RuntimeContext.cpp",
+ "src/runtime/Scheduler.cpp",
+ "src/runtime/SchedulerFactory.cpp",
+ "src/runtime/SchedulerUtils.cpp",
+ "src/runtime/SubTensor.cpp",
+ "src/runtime/Tensor.cpp",
+ "src/runtime/TensorAllocator.cpp",
+ "src/runtime/Utils.cpp",
+ "src/runtime/CPP/ICPPSimpleFunction.cpp",
+ "src/runtime/CPP/functions/CPPBoxWithNonMaximaSuppressionLimit.cpp",
+ "src/runtime/CPP/functions/CPPDetectionOutputLayer.cpp",
+ "src/runtime/CPP/functions/CPPDetectionPostProcessLayer.cpp",
+ "src/runtime/CPP/functions/CPPNonMaximumSuppression.cpp",
+ "src/runtime/CPP/functions/CPPPermute.cpp",
+ "src/runtime/CPP/functions/CPPTopKV.cpp",
+ "src/runtime/CPP/functions/CPPUpsample.cpp"
],
+ "logging": [
+ "src/core/utils/logging/FilePrinter.cpp",
+ "src/core/utils/logging/Helpers.cpp",
+ "src/core/utils/logging/Logger.cpp",
+ "src/core/utils/logging/LoggerRegistry.cpp"
+ ],
+ "scheduler": {
+ "single": [ "src/runtime/CPP/SingleThreadScheduler.cpp" ],
+ "threads": [ "src/runtime/CPP/CPPScheduler.cpp" ],
+ "omp": [ "src/runtime/OMP/OMPScheduler.cpp"]
+ },
"c_api": {
"common": [
"src/c/AclContext.cpp",
@@ -28,6 +100,14 @@
"src/c/operators/AclActivation.cpp"
]
},
+ "high_priority": [
+ "Activation",
+ "DepthwiseConv2d",
+ "Conv2d",
+ "Permute",
+ "Pool2d",
+ "Reshape"
+ ],
"gpu": {
"common": [
"src/core/CL/CLCompileContext.cpp",
@@ -41,19 +121,11 @@
"src/core/CL/ICLSimpleKernel.cpp",
"src/core/CL/ICLTensor.cpp",
"src/core/CL/OpenCL.cpp",
- "src/gpu/cl/ClKernelLibrary.cpp",
- "src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp",
- "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp",
- "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp",
- "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp",
- "src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp",
- "src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp",
- "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp",
- "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp",
- "src/core/CL/kernels/CLFillBorderKernel.cpp",
"src/gpu/cl/ClContext.cpp",
+ "src/gpu/cl/ClKernelLibrary.cpp",
"src/gpu/cl/ClQueue.cpp",
"src/gpu/cl/ClTensor.cpp",
+ "src/core/CL/kernels/CLFillBorderKernel.cpp",
"src/runtime/CL/CLBufferAllocator.cpp",
"src/runtime/CL/CLGEMMHeuristicsHandle.cpp",
"src/runtime/CL/CLHelpers.cpp",
@@ -68,888 +140,1022 @@
"src/runtime/CL/CLTuner.cpp",
"src/runtime/CL/ICLSimpleFunction.cpp",
"src/runtime/CL/Utils.cpp",
- "src/runtime/CL/gemm/CLGEMMDefaultTypeBifrost.cpp",
- "src/runtime/CL/gemm/CLGEMMDefaultTypeMidgard.cpp",
- "src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp",
- "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.cpp",
"src/runtime/CL/mlgo/HeuristicTree.cpp",
"src/runtime/CL/mlgo/MLGOHeuristics.cpp",
"src/runtime/CL/mlgo/MLGOParser.cpp",
"src/runtime/CL/mlgo/Utils.cpp",
"src/runtime/CL/tuners/CLTuningParametersList.cpp"
],
- "high_priority": [
- "Activation",
- "DepthwiseConv2d",
- "DirectConv2d",
- "Permute",
- "Pool2d",
- "Reshape"
- ],
"operators": {
- "Activation": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClActivation.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClActivationKernel.cpp"
- ]
- }
- },
- "Add": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClAdd.cpp"
- ]
- }
- },
- "Cast": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClCast.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClCastKernel.cpp"
- ]
- }
- },
- "Concatenate": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClConcatenate.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp",
- "src/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp",
- "src/gpu/cl/kernels/ClWidthConcatenateKernel.cpp",
- "src/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
- "src/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
- "src/gpu/cl/kernels/ClBatchConcatenateKernel.cpp"
- ]
- }
- },
- "DirectConv2d": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClDirectConv2d.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClDirectConv2dKernel.cpp"
- ]
- }
- },
- "FullyConnected": {
- "deps": [
- "ClFlatten",
- "ClConvertFullyConnectedWeights",
- "ClGemm",
- "ClGemmLowpMatrixMultiplyCore",
- "ClTranspose"
- ],
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClFullyConnected.cpp"
- ]
- }
- },
- "ConvertFullyConnectedWeights": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp"
- ]
- }
- },
- "Permute": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClPermute.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClPermuteKernel.cpp"
- ]
- }
- },
- "Pool2d": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClPool2d.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClPool2dKernel.cpp"
- ]
- }
- },
- "Conv2d": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClConv2d.cpp"
- ]
- }
- },
- "PRelu": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClPRelu.cpp"
- ]
- }
- },
- "Reshape": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClReshape.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClReshapeKernel.cpp"
- ]
- }
- },
- "Copy": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClCopy.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClCopyKernel.cpp"
- ]
- }
- },
- "Crop": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClCrop.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClCropKernel.cpp"
- ]
- }
- },
- "Dequantize": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClDequantize.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClDequantizeKernel.cpp"
- ]
- }
- },
- "Elementwise": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClElementwiseOperations.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClElementwiseKernel.cpp"
- ]
- }
- },
- "ElementwiseUnary": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClElementwiseUnary.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp"
- ]
- }
- },
- "Fill": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClFill.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClFillKernel.cpp"
- ]
- }
- },
- "Flatten": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClFlatten.cpp"
- ]
- }
- },
- "Floor": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClFloor.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClFloorKernel.cpp"
- ]
- }
- },
- "GEMM": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClGemm.cpp",
- "src/gpu/cl/operators/ClGemmConv2d.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp",
- "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp",
- "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp",
- "src/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp",
- "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp",
- "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp"
- ]
- }
- },
- "GEMMLowp": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp",
- "src/gpu/cl/operators/ClGemmLowpOutputStage.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.cpp",
- "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.cpp",
- "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp",
- "src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp",
- "src/gpu/cl/kernels/ClGemmLowpOffsetContributionOutputStageKernel.cpp",
- "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp",
- "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.cpp",
- "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.cpp",
- "src/gpu/cl/kernels/ClGemmLowpReductionKernel.cpp"
- ]
- }
- },
- "Mul": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClMul.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClMulKernel.cpp"
- ]
- }
- },
- "Quantize": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClQuantize.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClQuantizeKernel.cpp"
- ]
- }
- },
- "Scale": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClScale.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClScaleKernel.cpp"
- ]
- }
- },
- "Softmax": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClSoftmax.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClSoftmaxKernel.cpp"
- ]
- }
- },
- "Sub": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClSub.cpp"
- ]
- }
- },
- "Transpose": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClTranspose.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClTransposeKernel.cpp"
- ]
- }
- },
- "GenerateProposals": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp"
- ]
- }
- },
- "ArgMinMax": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp"
- ]
- }
- },
- "BatchNormalization": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp"
- ]
- }
- },
- "BatchToSpace": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp"
- ]
- }
- },
- "Bitwise": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLBitwiseKernel.cpp"
- ]
- }
- },
- "BoundingBoxTransform": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp"
- ]
- }
- },
- "ChannelShuffleLayer": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp"
- ]
- }
- },
- "GEMMConv2d": {
- "files": {
- "kernel": [
- "src/gpu/cl/kernels/ClCol2ImKernel.cpp",
- "src/gpu/cl/kernels/ClIm2ColKernel.cpp"
- ]
- }
- },
- "Comparison": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLComparisonKernel.cpp"
- ]
- }
- },
- "DeconvolutionLayerUpsample": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp"
- ]
- }
- },
- "DeconvolutionReshapeOutput": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp"
- ]
- }
- },
- "DepthToSpace": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp"
- ]
- }
- },
- "DepthwiseConvolutionLayerNative": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp"
- ]
- }
- },
- "FFTDigitReverse": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLFFTDigitReverseKernel.cpp"
- ]
- }
- },
- "FFTRadixStage": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLFFTRadixStageKernel.cpp"
- ]
- }
- },
- "FFTScale": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLFFTScaleKernel.cpp"
- ]
- }
- },
- "FuseBatchNormalization": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp"
- ]
- }
- },
- "Gather": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLGatherKernel.cpp"
- ]
- }
- },
- "InstanceNormalization": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp"
- ]
- }
- },
- "L2Normalize": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp"
- ]
- }
- },
- "LogicalNot": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClLogicalNot.cpp"
- ]
- }
- },
- "MaxUnpooling": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp"
- ]
- }
- },
- "MeanStdDevNormalization": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp"
- ]
- }
- },
- "MinMax": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLMinMaxLayerKernel.cpp"
- ]
- }
- },
- "Normalization": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLNormalizationLayerKernel.cpp"
- ]
- }
- },
- "NormalizePlanarYUV": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp"
- ]
- }
- },
- "Pad": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLPadLayerKernel.cpp"
- ]
- }
- },
- "PriorBox": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLPriorBoxLayerKernel.cpp"
- ]
- }
- },
- "QLSTMLayerNormalization": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp"
- ]
- }
- },
- "Range": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLRangeKernel.cpp"
- ]
- }
- },
- "ReductionOperation": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLReductionOperationKernel.cpp"
- ]
- }
- },
- "Remap": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLRemapKernel.cpp"
- ]
- }
- },
- "Reorg": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLReorgLayerKernel.cpp"
- ]
- }
- },
- "Reverse": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLReverseKernel.cpp"
- ]
- }
- },
- "ROIAlign": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLROIAlignLayerKernel.cpp"
- ]
- }
- },
- "ROIPooling": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLROIPoolingLayerKernel.cpp"
- ]
- }
- },
- "Select": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLSelectKernel.cpp"
- ]
- }
- },
- "SpaceToBatch": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp"
- ]
- }
- },
- "SpaceToDepth": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp"
- ]
- }
- },
- "Stack": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLStackLayerKernel.cpp"
- ]
- }
- },
- "StridedSlice": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLStridedSliceKernel.cpp"
- ]
- }
- },
- "Tile": {
- "files": {
- "kernel": [
- "src/core/CL/kernels/CLTileKernel.cpp"
- ]
- }
- },
- "WeightsReshape": {
- "files": {
- "kernel": [
- "src/gpu/cl/kernels/ClWeightsReshapeKernel.cpp"
- ]
- }
- },
- "WinogradConv2d": {
- "files": {
- "operator": [
- "src/gpu/cl/operators/ClWinogradConv2d.cpp"
- ],
- "kernel": [
- "src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp",
- "src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp",
- "src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp"
- ]
- }
+ "Activation":{
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClActivationKernel.cpp",
+ "src/gpu/cl/operators/ClActivation.cpp",
+ "src/runtime/CL/functions/CLActivationLayer.cpp"
+ ]
+ }
+ },
+ "ArgMinMax": {
+ "deps": [ "Reshape" ],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLArgMinMaxLayerKernel.cpp",
+ "src/runtime/CL/functions/CLArgMinMaxLayer.cpp"
+ ]
+ }
+ },
+ "Add": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClElementwiseKernel.cpp",
+ "src/gpu/cl/operators/ClAdd.cpp"
+ ]
+ }
+ },
+ "BatchNormalization": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLBatchNormalizationLayerKernel.cpp",
+ "src/core/CL/kernels/CLFuseBatchNormalizationKernel.cpp",
+ "src/runtime/CL/functions/CLBatchNormalizationLayer.cpp",
+ "src/runtime/CL/functions/CLFuseBatchNormalization.cpp"
+ ]
+ }
+ },
+ "BatchToSpace": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLBatchToSpaceLayerKernel.cpp",
+ "src/runtime/CL/functions/CLBatchToSpaceLayer.cpp"
+ ]
+ }
+ },
+ "Bitwise": {
+ "files": {
+ "common": [ "src/core/CL/kernels/CLBitwiseKernel.cpp" ]
+ }
+ },
+ "BitwiseAnd": {
+ "deps": [ "Bitwise" ],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLBitwiseAnd.cpp" ]
+ }
+ },
+ "BitwiseNot": {
+ "deps": [ "Bitwise" ],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLBitwiseNot.cpp" ]
+ }
+ },
+ "BitwiseOr": {
+ "deps": [ "Bitwise" ],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLBitwiseOr.cpp" ]
+ }
+ },
+ "BitwiseXor": {
+ "deps": [ "Bitwise" ],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLBitwiseXor.cpp" ]
+ }
+ },
+ "BoundingBoxTransform": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLBoundingBoxTransformKernel.cpp",
+ "src/runtime/CL/functions/CLBoundingBoxTransform.cpp"
+ ]
+ }
+ },
+ "Cast": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClCastKernel.cpp",
+ "src/gpu/cl/operators/ClCast.cpp",
+ "src/runtime/CL/functions/CLCast.cpp"
+ ]
+ }
+ },
+ "ChannelShuffle": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLChannelShuffleLayerKernel.cpp",
+ "src/runtime/CL/functions/CLChannelShuffleLayer.cpp"
+ ]
+ }
+ },
+ "Comparison": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLComparisonKernel.cpp",
+ "src/runtime/CL/functions/CLComparison.cpp"
+ ]
+ }
+ },
+ "Concatenate": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClBatchConcatenateKernel.cpp",
+ "src/gpu/cl/kernels/ClDepthConcatenateKernel.cpp",
+ "src/gpu/cl/kernels/ClHeightConcatenateKernel.cpp",
+ "src/gpu/cl/kernels/ClWidthConcatenate2TensorsKernel.cpp",
+ "src/gpu/cl/kernels/ClWidthConcatenate4TensorsKernel.cpp",
+ "src/gpu/cl/kernels/ClWidthConcatenateKernel.cpp",
+ "src/gpu/cl/operators/ClConcatenate.cpp",
+ "src/runtime/CL/functions/CLConcatenateLayer.cpp"
+ ]
+ }
+ },
+ "Conv2d": {
+ "deps": [
+ "Activation",
+ "ElementwiseBinary",
+ "FFT2D",
+ "Gemm",
+ "Mul",
+ "Pad",
+ "Permute",
+ "Reduction",
+ "Reshape",
+ "Reverse",
+ "Slice"
+ ],
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClDirectConv2dKernel.cpp",
+ "src/gpu/cl/kernels/ClWinogradFilterTransformKernel.cpp",
+ "src/gpu/cl/kernels/ClWinogradInputTransformKernel.cpp",
+ "src/gpu/cl/kernels/ClWinogradOutputTransformKernel.cpp",
+ "src/gpu/cl/kernels/ClIm2ColKernel.cpp",
+ "src/gpu/cl/kernels/ClCol2ImKernel.cpp",
+ "src/gpu/cl/operators/ClConv2d.cpp",
+ "src/gpu/cl/operators/ClDirectConv2d.cpp",
+ "src/gpu/cl/operators/ClGemmConv2d.cpp",
+ "src/gpu/cl/operators/ClWinogradConv2d.cpp",
+ "src/gpu/cl/kernels/ClWeightsReshapeKernel.cpp",
+ "src/runtime/CL/functions/CLConvolutionLayer.cpp",
+ "src/runtime/CL/functions/CLDirectConvolutionLayer.cpp",
+ "src/runtime/CL/functions/CLFFTConvolutionLayer.cpp",
+ "src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp",
+ "src/runtime/CL/functions/CLWinogradConvolutionLayer.cpp"
+ ]
+ }
+ },
+ "Copy": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClCopyKernel.cpp",
+ "src/gpu/cl/operators/ClCopy.cpp",
+ "src/runtime/CL/functions/CLCopy.cpp"
+ ]
+ }
+ },
+ "CropResize": {
+ "deps": [ "Copy", "Fill", "Scale" ],
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClCropKernel.cpp",
+ "src/gpu/cl/operators/ClCrop.cpp",
+ "src/runtime/CL/functions/CLCrop.cpp",
+ "src/runtime/CL/functions/CLCropResize.cpp"
+ ]
+ }
+ },
+ "Deconv2d": {
+ "deps": [ "Conv2d", "Reverse", "Transpose"],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp",
+ "src/core/CL/kernels/CLDeconvolutionReshapeOutputKernel.cpp",
+ "src/runtime/CL/functions/CLDeconvolutionLayer.cpp",
+ "src/runtime/CL/functions/CLDeconvolutionLayerUpsample.cpp",
+ "src/runtime/CL/functions/CLDirectDeconvolutionLayer.cpp",
+ "src/runtime/CL/functions/CLGEMMDeconvolutionLayer.cpp"
+ ]
+ }
+ },
+ "DepthConvert": {
+ "deps": [ "Cast"],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLDepthConvertLayer.cpp" ]
+ }
+ },
+ "DepthToSpace": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLDepthToSpaceLayerKernel.cpp",
+ "src/runtime/CL/functions/CLDepthToSpaceLayer.cpp"
+ ]
+ }
+ },
+ "DepthwiseConv2d": {
+ "deps": [ "Permute" ],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLDepthwiseConvolutionLayerNativeKernel.cpp",
+ "src/runtime/CL/functions/CLDepthwiseConvolutionLayer.cpp"
+ ]
+ }
+ },
+ "Dequantize": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClDequantizeKernel.cpp",
+ "src/gpu/cl/operators/ClDequantize.cpp",
+ "src/runtime/CL/functions/CLDequantizationLayer.cpp"
+ ]
+ }
+ },
+ "ElementwiseBinary": {
+ "deps": ["Add", "Sub"],
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClElementwiseKernel.cpp",
+ "src/gpu/cl/operators/ClElementwiseOperations.cpp",
+ "src/runtime/CL/functions/CLElementwiseOperations.cpp"
+ ]
+ }
+ },
+ "ElementwiseUnary":{
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClElementwiseUnaryKernel.cpp",
+ "src/gpu/cl/operators/ClElementwiseUnary.cpp",
+ "src/runtime/CL/functions/CLElementwiseUnaryLayer.cpp"
+ ]
+ }
+ },
+ "FFT1D": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLFFTDigitReverseKernel.cpp",
+ "src/core/CL/kernels/CLFFTRadixStageKernel.cpp",
+ "src/core/CL/kernels/CLFFTScaleKernel.cpp",
+ "src/runtime/CL/functions/CLFFT1D.cpp"
+ ]
+ }
+ },
+ "FFT2D": {
+ "deps": [ "FFT1D" ],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLFFT2D.cpp" ]
+ }
+ },
+ "Fill": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClFillKernel.cpp",
+ "src/gpu/cl/operators/ClFill.cpp",
+ "src/runtime/CL/functions/CLFill.cpp"
+ ]
+ }
+ },
+ "Flatten": {
+ "files": {
+ "common": [
+ "src/gpu/cl/operators/ClFlatten.cpp",
+ "src/runtime/CL/functions/CLFlattenLayer.cpp"
+ ]
+ }
+ },
+ "Floor": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClFloorKernel.cpp",
+ "src/gpu/cl/operators/ClFloor.cpp",
+ "src/runtime/CL/functions/CLFloor.cpp"
+ ]
+ }
+ },
+ "FullyConnected": {
+ "deps": [ "Flatten", "Gemm", "Transpose"],
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.cpp",
+ "src/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp",
+ "src/gpu/cl/operators/ClFullyConnected.cpp",
+ "src/runtime/CL/functions/CLConvertFullyConnectedWeights.cpp",
+ "src/runtime/CL/functions/CLFullyConnectedLayer.cpp"
+ ]
+ }
+ },
+ "Gather": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLGatherKernel.cpp",
+ "src/runtime/CL/functions/CLGather.cpp"]
+ }
+ },
+ "Gemm": {
+ "deps": [ "Cast" ],
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/gemm/ClGemmHelpers.cpp",
+ "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeBifrost.cpp",
+ "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeMidgard.cpp",
+ "src/gpu/cl/kernels/gemm/native/ClGemmDefaultConfigNativeValhall.cpp",
+ "src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedBifrost.cpp",
+ "src/gpu/cl/kernels/gemm/reshaped/ClGemmDefaultConfigReshapedValhall.cpp",
+ "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyBifrost.cpp",
+ "src/gpu/cl/kernels/gemm/reshaped_only_rhs/ClGemmDefaultConfigReshapedRhsOnlyValhall.cpp",
+ "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyNativeKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmLowpMatrixMultiplyReshapedOnlyRhsKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmLowpOffsetContributionKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmLowpOffsetContributionOutputStageKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFixedPointKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmMatrixMultiplyKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmMatrixMultiplyNativeKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmMatrixMultiplyReshapedOnlyRhsKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmReshapeLhsMatrixKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmReshapeRhsMatrixKernel.cpp",
+ "src/gpu/cl/kernels/ClGemmLowpReductionKernel.cpp",
+ "src/gpu/cl/operators/ClGemm.cpp",
+ "src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp",
+ "src/gpu/cl/operators/ClGemmLowpOutputStage.cpp",
+ "src/runtime/CL/gemm/CLGEMMDefaultTypeBifrost.cpp",
+ "src/runtime/CL/gemm/CLGEMMDefaultTypeMidgard.cpp",
+ "src/runtime/CL/gemm/CLGEMMDefaultTypeValhall.cpp",
+ "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.cpp",
+ "src/runtime/CL/functions/CLGEMM.cpp",
+ "src/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.cpp",
+ "src/runtime/CL/functions/CLGEMMLowpOutputStage.cpp"
+ ]
+ }
+ },
+ "GenerateProposals": {
+ "deps": [ "BoundingBoxTransform", "Dequantize", "Pad", "Permute", "Quantize", "Reshape" ],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLGenerateProposalsLayerKernel.cpp",
+ "src/runtime/CL/functions/CLGenerateProposalsLayer.cpp"
+ ]
+ }
+ },
+ "InstanceNormalize": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLInstanceNormalizationLayerKernel.cpp",
+ "src/runtime/CL/functions/CLInstanceNormalizationLayer.cpp"
+ ]
+ }
+ },
+ "L2Normalize": {
+ "deps": [ "Reduction" ],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLL2NormalizeLayerKernel.cpp",
+ "src/runtime/CL/functions/CLL2NormalizeLayer.cpp"
+ ]
+ }
+ },
+ "Logical": {
+ "files": {
+ "common": [
+ "src/gpu/cl/operators/ClLogicalNot.cpp",
+ "src/runtime/CL/functions/CLLogicalAnd.cpp",
+ "src/runtime/CL/functions/CLLogicalNot.cpp",
+ "src/runtime/CL/functions/CLLogicalOr.cpp"
+ ]
+ }
+ },
+ "LSTM": {
+ "deps": [
+ "Activation",
+ "Concatenate",
+ "Copy",
+ "Dequantize",
+ "ElementwiseBinary",
+ "Fill",
+ "FullyConnected",
+ "Gemm",
+ "MeanStdDevNormalize",
+ "Mul",
+ "Quantize",
+ "Slice",
+ "Transpose"
+ ],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLQLSTMLayerNormalizationKernel.cpp",
+ "src/runtime/CL/functions/CLQLSTMLayer.cpp",
+ "src/runtime/CL/functions/CLLSTMLayer.cpp",
+ "src/runtime/CL/functions/CLLSTMLayerQuantized.cpp"
+ ]
+ }
+ },
+ "MaxUnpool2d": {
+ "deps": [ "Fill" ],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.cpp",
+ "src/runtime/CL/functions/CLMaxUnpoolingLayer.cpp"
+ ]
+ }
+ },
+ "MeanStdDevNormalize": {
+ "deps": [ "Reduction" ],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.cpp",
+ "src/runtime/CL/functions/CLMeanStdDevNormalizationLayer.cpp",
+ "src/runtime/CL/functions/CLReduceMean.cpp"
+ ]
+ }
+ },
+ "Mul": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClMulKernel.cpp",
+ "src/gpu/cl/operators/ClMul.cpp",
+ "src/runtime/CL/functions/CLPixelWiseMultiplication.cpp"
+ ]
+ }
+ },
+ "Normalize": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLNormalizationLayerKernel.cpp",
+ "src/runtime/CL/functions/CLNormalizationLayer.cpp"
+ ]
+ }
+ },
+ "Pad": {
+ "deps": [ "Copy" ],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLPadLayerKernel.cpp",
+ "src/runtime/CL/functions/CLPadLayer.cpp"
+ ]
+ }
+ },
+ "Permute": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClPermuteKernel.cpp",
+ "src/gpu/cl/operators/ClPermute.cpp",
+ "src/runtime/CL/functions/CLPermute.cpp"
+ ]
+ }
+ },
+ "Pool2d": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClPool2dKernel.cpp",
+ "src/gpu/cl/operators/ClPool2d.cpp",
+ "src/runtime/CL/functions/CLPoolingLayer.cpp"
+ ]
+ }
+ },
+ "PRelu": {
+ "deps": [ "ElementwiseBinary" ],
+ "files": {
+ "common": [
+ "src/gpu/cl/operators/ClPRelu.cpp",
+ "src/runtime/CL/functions/CLPReluLayer.cpp"
+ ]
+ }
+ },
+ "PriorBox": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLPriorBoxLayerKernel.cpp",
+ "src/runtime/CL/functions/CLPriorBoxLayer.cpp"
+ ]
+ }
+ },
+ "Quantize": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClQuantizeKernel.cpp",
+ "src/gpu/cl/operators/ClQuantize.cpp",
+ "src/runtime/CL/functions/CLQuantizationLayer.cpp"
+ ]
+ }
+ },
+ "Range": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLRangeKernel.cpp",
+ "src/runtime/CL/functions/CLRange.cpp"
+ ]
+ }
+ },
+ "Reduction": {
+ "deps": [ "Reshape" ],
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLReductionOperationKernel.cpp",
+ "src/runtime/CL/functions/CLReductionOperation.cpp"
+ ]
+ }
+ },
+ "Remap": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLRemapKernel.cpp",
+ "src/runtime/CL/functions/CLRemap.cpp"]
+ }
+ },
+ "Reorg": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLReorgLayerKernel.cpp",
+ "src/runtime/CL/functions/CLReorgLayer.cpp"
+ ]
+ }
+ },
+ "Reshape": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClReshapeKernel.cpp",
+ "src/gpu/cl/operators/ClReshape.cpp",
+ "src/runtime/CL/functions/CLReshapeLayer.cpp"
+ ]
+ }
+ },
+ "Reverse": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLReverseKernel.cpp",
+ "src/runtime/CL/functions/CLReverse.cpp"
+ ]
+ }
+ },
+ "RNN": {
+ "deps": [ "Activation", "Cast", "ElementwiseBinary", "FullyConnected", "Gemm"],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLRNNLayer.cpp" ]
+ }
+ },
+ "ROIAlign": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLROIAlignLayerKernel.cpp",
+ "src/runtime/CL/functions/CLROIAlignLayer.cpp"
+ ]
+ }
+ },
+ "ROIPool2d": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLROIPoolingLayerKernel.cpp",
+ "src/runtime/CL/functions/CLROIPoolingLayer.cpp"
+ ]
+ }
+ },
+ "Scale": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClScaleKernel.cpp",
+ "src/gpu/cl/operators/ClScale.cpp",
+ "src/runtime/CL/functions/CLScale.cpp"
+ ]
+ }
+ },
+ "Select": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLSelectKernel.cpp",
+ "src/runtime/CL/functions/CLSelect.cpp"
+ ]
+ }
+ },
+ "Slice": {
+ "deps": [ "StridedSlice" ],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLSlice.cpp" ]
+ }
+ },
+ "Softmax": {
+ "deps": [ "Permute" ],
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClSoftmaxKernel.cpp",
+ "src/gpu/cl/operators/ClSoftmax.cpp",
+ "src/runtime/CL/functions/CLSoftmaxLayer.cpp"
+ ]
+ }
+ },
+ "SpaceToBatch": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLSpaceToBatchLayerKernel.cpp",
+ "src/runtime/CL/functions/CLSpaceToBatchLayer.cpp"
+ ]
+ }
+ },
+ "SpaceToDepth": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLSpaceToDepthLayerKernel.cpp",
+ "src/runtime/CL/functions/CLSpaceToDepthLayer.cpp"
+ ]
+ }
+ },
+ "Split": {
+ "deps": [ "StridedSlice" ],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLSplit.cpp" ]
+ }
+ },
+ "Stack": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLStackLayerKernel.cpp",
+ "src/runtime/CL/functions/CLStackLayer.cpp"
+ ]
+ }
+ },
+ "StridedSlice": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLStridedSliceKernel.cpp",
+ "src/runtime/CL/functions/CLStridedSlice.cpp"
+ ]
+ }
+ },
+ "Sub": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClElementwiseKernel.cpp",
+ "src/gpu/cl/operators/ClSub.cpp"
+ ]
+ }
+ },
+ "Tile": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLTileKernel.cpp",
+ "src/runtime/CL/functions/CLTile.cpp"
+ ]
+ }
+ },
+ "Transpose": {
+ "files": {
+ "common": [
+ "src/gpu/cl/kernels/ClTransposeKernel.cpp",
+ "src/gpu/cl/operators/ClTranspose.cpp",
+ "src/runtime/CL/functions/CLTranspose.cpp"
+ ]
+ }
+ },
+ "Unstack": {
+ "deps": [ "StridedSlice" ],
+ "files": {
+ "common": [ "src/runtime/CL/functions/CLUnstack.cpp" ]
+ }
+ },
+ "YUVNormalize": {
+ "files": {
+ "common": [
+ "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.cpp",
+ "src/runtime/CL/functions/CLNormalizePlanarYUVLayer.cpp"
+ ]
}
}
- },
+ }
+},
"cpu": {
"common": [
"src/cpu/CpuContext.cpp",
"src/cpu/CpuQueue.cpp",
- "src/cpu/CpuTensor.cpp"
- ],
- "high_priority": [
- "Activation",
- "DepthwiseConv2d",
- "DirectConv2d",
- "Permute",
- "Pool2d",
- "Reshape",
- "FillBorder"
+ "src/cpu/CpuTensor.cpp",
+ "src/core/NEON/kernels/NEFillBorderKernel.cpp",
+ "src/runtime/NEON/INEOperator.cpp",
+ "src/runtime/NEON/INESimpleFunction.cpp",
+ "src/runtime/NEON/INESimpleFunctionNoBorder.cpp"
],
"operators": {
"Activation": {
"files": {
- "operator": [
- "src/cpu/operators/CpuActivation.cpp"
+ "common": [
+ "src/cpu/operators/CpuActivation.cpp",
+ "src/cpu/kernels/CpuActivationKernel.cpp",
+ "src/runtime/NEON/functions/NEActivationLayer.cpp"
],
- "kernel": [
- "src/cpu/kernels/CpuActivationKernel.cpp"
- ],
- "sve": {
- "fp32": [
- "src/cpu/kernels/activation/sve/fp32.cpp"
- ],
- "fp16": [
- "src/cpu/kernels/activation/sve/fp16.cpp"
- ],
- "qsymm16": [
- "src/cpu/kernels/activation/sve/qsymm16.cpp"
- ],
- "qasymm8": [
- "src/cpu/kernels/activation/sve/qasymm8.cpp"
- ],
- "qasymm8_signed": [
- "src/cpu/kernels/activation/sve/qasymm8_signed.cpp"
- ]
- },
"neon": {
- "fp32": [
- "src/cpu/kernels/activation/neon/fp32.cpp"
- ],
- "fp16": [
- "src/cpu/kernels/activation/neon/fp16.cpp"
- ],
- "qsymm16": [
- "src/cpu/kernels/activation/neon/qsymm16.cpp"
- ],
- "qasymm8": [
- "src/cpu/kernels/activation/neon/qasymm8.cpp"
- ],
- "qasymm8_signed": [
- "src/cpu/kernels/activation/neon/qasymm8_signed.cpp"
- ]
+ "fp16": [ "src/cpu/kernels/activation/neon/fp16.cpp" ],
+ "fp32": [ "src/cpu/kernels/activation/neon/fp32.cpp" ],
+ "qasymm8": [ "src/cpu/kernels/activation/neon/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/activation/neon/qasymm8_signed.cpp" ],
+ "qsymm16": [ "src/cpu/kernels/activation/neon/qsymm16.cpp" ]
+ },
+ "sve": {
+ "fp16": [ "src/cpu/kernels/activation/sve/fp16.cpp" ],
+ "fp32": [ "src/cpu/kernels/activation/sve/fp32.cpp" ],
+ "qasymm8": [ "src/cpu/kernels/activation/neon/qasymm8.cpp", "src/cpu/kernels/activation/sve/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/activation/neon/qasymm8_signed.cpp", "src/cpu/kernels/activation/sve/qasymm8_signed.cpp" ],
+ "qsymm16": [ "src/cpu/kernels/activation/neon/qsymm16.cpp", "src/cpu/kernels/activation/sve/qsymm16.cpp" ]
}
}
},
+ "ArgMinMax": {
+ "deps": [ "Reduction" ],
+ "files": {
+ "common": [ "src/runtime/NEON/functions/NEArgMinMaxLayer.cpp" ]
+ }
+ },
"Add": {
"files": {
- "operator": [
- "src/cpu/operators/CpuAdd.cpp"
+ "common": [
+ "src/cpu/operators/CpuAdd.cpp",
+ "src/cpu/kernels/CpuAddKernel.cpp",
+ "src/runtime/NEON/functions/NEArithmeticAddition.cpp"
],
- "kernel": [
- "src/cpu/kernels/CpuAddKernel.cpp"
- ],
- "sve": {
- "all": [
- "src/cpu/kernels/add/sve/impl.cpp"
- ],
- "qsymm16": [
- "src/cpu/kernels/add/sve/qsymm16.cpp"
- ],
- "qasymm8": [
- "src/cpu/kernels/add/sve/qasymm8.cpp"
- ],
- "qasymm8_signed": [
- "src/cpu/kernels/add/sve/qasymm8_signed.cpp"
- ]
- },
"neon": {
- "qsymm16": [
- "src/cpu/kernels/add/neon/qsymm16.cpp"
- ],
- "qasymm8": [
- "src/cpu/kernels/add/neon/qasymm8.cpp"
- ],
- "qasymm8_signed": [
- "src/cpu/kernels/add/neon/qasymm8_signed.cpp"
- ]
+ "qasymm8": [ "src/cpu/kernels/add/neon/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/add/neon/qasymm8_signed.cpp" ],
+ "qsymm16": [ "src/cpu/kernels/add/neon/qsymm16.cpp" ]
+ },
+ "sve": {
+ "common": [ "src/cpu/kernels/add/sve/impl.cpp" ],
+ "qasymm8": [ "src/cpu/kernels/add/neon/qasymm8.cpp", "src/cpu/kernels/add/sve/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/add/neon/qasymm8_signed.cpp", "src/cpu/kernels/add/sve/qasymm8_signed.cpp" ],
+ "qsymm16": [ "src/cpu/kernels/add/neon/qsymm16.cpp", "src/cpu/kernels/add/sve/qsymm16.cpp" ]
}
}
},
- "BatchNorm": {
+ "BatchNormalize": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEBatchNormalizationLayerKernel.cpp",
+ "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp",
+ "src/runtime/NEON/functions/NEBatchNormalizationLayer.cpp",
+ "src/runtime/NEON/functions/NEFuseBatchNormalization.cpp"
],
- "sve": {
- "fp32": [
- "src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp"
- ],
- "fp16": [
- "src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp"
- ]
- },
"neon": {
- "fp32": [
- "src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp"
- ],
- "fp16": [
- "src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp"
- ]
+ "fp16": [ "src/core/NEON/kernels/batchnormalization/impl/NEON/fp16.cpp" ],
+ "fp32": [ "src/core/NEON/kernels/batchnormalization/impl/NEON/fp32.cpp" ]
+ },
+ "sve": {
+ "fp16": [ "src/core/NEON/kernels/batchnormalization/impl/SVE/fp16.cpp" ],
+ "fp32": [ "src/core/NEON/kernels/batchnormalization/impl/SVE/fp32.cpp" ]
}
}
},
"BatchToSpace": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEBatchToSpaceLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEBatchToSpaceLayer.cpp"
]
}
},
"BitwiseAnd": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEBitwiseAndKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEBitwiseAndKernel.cpp",
+ "src/runtime/NEON/functions/NEBitwiseAnd.cpp"
]
}
},
"BitwiseNot": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEBitwiseNotKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEBitwiseNotKernel.cpp",
+ "src/runtime/NEON/functions/NEBitwiseNot.cpp"
]
}
},
"BitwiseOr": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEBitwiseOrKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEBitwiseOrKernel.cpp",
+ "src/runtime/NEON/functions/NEBitwiseOr.cpp"
]
}
},
"BitwiseXor": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEBitwiseXorKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEBitwiseXorKernel.cpp",
+ "src/runtime/NEON/functions/NEBitwiseXor.cpp"
]
}
},
"BoundingBoxTransform": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp"
- ]
- }
- },
- "ChannelShuffleLayer": {
- "files": {
- "kernel": [
- "src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEBoundingBoxTransformKernel.cpp",
+ "src/runtime/NEON/functions/NEBoundingBoxTransform.cpp"
]
}
},
- "Col2Im": {
+ "Cast": {
"files": {
- "kernel": [
- "src/cpu/kernels/CpuCol2ImKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuCast.cpp",
+ "src/cpu/kernels/CpuCastKernel.cpp",
+ "src/runtime/NEON/functions/NECast.cpp"
]
}
},
- "Cast": {
+ "ChannelShuffle": {
"files": {
- "operator": [
- "src/cpu/operators/CpuCast.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuCastKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEChannelShuffleLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEChannelShuffleLayer.cpp"
]
}
},
"Concatenate": {
"files": {
- "operator": [
- "src/cpu/operators/CpuConcatenate.cpp"
- ],
- "kernel": [
+ "common": [
+ "src/cpu/operators/CpuConcatenate.cpp",
"src/cpu/kernels/CpuConcatenateWidthKernel.cpp",
"src/cpu/kernels/CpuConcatenateBatchKernel.cpp",
"src/cpu/kernels/CpuConcatenateDepthKernel.cpp",
- "src/cpu/kernels/CpuConcatenateHeightKernel.cpp"
+ "src/cpu/kernels/CpuConcatenateHeightKernel.cpp",
+ "src/runtime/NEON/functions/NEConcatenateLayer.cpp"
]
}
},
- "ConvertFullyConnectedWeights": {
+ "Conv2d": {
+ "deps": [
+ "Activation",
+ "ElementwiseBinary",
+ "FFT2D",
+ "Gemm",
+ "Mul",
+ "Pad",
+ "Permute",
+ "Reshape",
+ "Reverse",
+ "Slice"
+ ],
"files": {
- "operator": [
- "src/cpu/operators/CpuConvertFullyConnectedWeights.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuConv2d.cpp",
+ "src/cpu/operators/CpuDirectConv2d.cpp",
+ "src/cpu/operators/CpuGemmDirectConv2d.cpp",
+ "src/cpu/operators/CpuGemmConv2d.cpp",
+ "src/cpu/operators/CpuWinogradConv2d.cpp",
+ "src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp",
+ "src/cpu/kernels/CpuDirectConv2dKernel.cpp",
+ "src/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp",
+ "src/cpu/kernels/CpuWinogradConv2dKernel.cpp",
+ "src/cpu/kernels/CpuCol2ImKernel.cpp",
+ "src/cpu/kernels/CpuIm2ColKernel.cpp",
+ "src/cpu/kernels/CpuWeightsReshapeKernel.cpp",
+ "src/core/NEON/kernels/convolution/common/padding.cpp",
+ "src/core/NEON/kernels/convolution/common/qasymm8.cpp",
+ "src/core/NEON/kernels/convolution/common/qsymm8.cpp",
+ "src/core/NEON/kernels/convolution/common/utils.cpp",
+ "src/core/NEON/kernels/convolution/winograd/padding.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp",
+ "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp",
+ "src/runtime/NEON/functions/NEConvolutionLayer.cpp",
+ "src/runtime/NEON/functions/NEDirectConvolutionLayer.cpp",
+ "src/runtime/NEON/functions/NEFFTConvolutionLayer.cpp",
+ "src/runtime/NEON/functions/NEGEMMConv2d.cpp",
+ "src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp",
+ "src/runtime/NEON/functions/NEWinogradConvolutionLayer.cpp"
]
}
},
- "ConvertQuantizedSignedness": {
+ "Copy": {
"files": {
- "kernel": [
- "src/cpu/kernels/CpuConvertQuantizedSignednessKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuCopy.cpp",
+ "src/cpu/kernels/CpuCopyKernel.cpp",
+ "src/runtime/NEON/functions/NECopy.cpp"
]
}
},
- "Convolution": {
+ "CropResize": {
+ "deps": [ "Scale" ],
"files": {
- "operator": [
- "src/cpu/operators/CpuConv2d.cpp"
+ "common": [
+ "src/core/NEON/kernels/NECropKernel.cpp",
+ "src/runtime/NEON/functions/NECropResize.cpp"
]
}
},
- "Copy": {
+ "Deconv2d": {
+ "deps": [ "Conv2d", "Reverse", "Transpose"],
"files": {
- "operator": [
- "src/cpu/operators/CpuCopy.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuCopyKernel.cpp"
+ "common": [
+ "src/runtime/NEON/functions/NEDeconvolutionLayer.cpp"
]
}
},
- "Crop": {
+ "DepthConvert": {
+ "deps": [ "Cast"],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NECropKernel.cpp"
+ "common": [
+ "src/runtime/NEON/functions/NEDepthConvertLayer.cpp"
+ ]
+ }
+ },
+ "DepthToSpace": {
+ "files": {
+ "common": [
+ "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEDepthToSpaceLayer.cpp"
]
}
},
"DepthwiseConv2d": {
- "deps": [
- "Activation",
- "Permute"
- ],
+ "deps": [ "Activation", "Permute" ],
"files": {
- "operator": [
+ "common": [
"src/cpu/operators/CpuDepthwiseConv2d.cpp",
"src/cpu/operators/CpuDepthwiseConv2dAssemblyDispatch.cpp",
- "src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp"
- ],
- "kernel": [
+ "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp",
+ "src/cpu/kernels/internal/CpuDepthwiseConv2dAssemblyWrapperKernel.cpp",
"src/core/NEON/kernels/convolution/common/padding.cpp",
"src/core/NEON/kernels/convolution/common/qasymm8.cpp",
"src/core/NEON/kernels/convolution/common/qsymm8.cpp",
"src/core/NEON/kernels/convolution/common/utils.cpp",
- "src/cpu/kernels/CpuDepthwiseConv2dNativeKernel.cpp"
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp",
+ "src/runtime/NEON/functions/NEDepthwiseConvolutionLayer.cpp"
],
+ "neon": {
+ "estate64": [
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output3x3_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s1_output4x4_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_3x3_s2_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_direct.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_5x5_s1_output2x2_mla_depthfirst/generic_indirect.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_3x3_s2_with_multiplier_output3x3_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_5x5_s1_with_multiplier_output2x4_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp32_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_s8qs_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_3x3_s2_with_multiplier_output2x4_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_nhwc_generic_output9_mla_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_u8s8u8q_packed_to_nhwc_generic_with_multiplier_output2x8_mla_depthfirst/generic.cpp"
+ ]
+ },
"sve": {
- "all": [
+ "common": [
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_8b_mla.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_s8q_3x3_dot.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/sve_u8q_3x3_dot.cpp",
@@ -994,17 +1200,7 @@
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8q_packed_to_nhwc_5x5_s1_with_multiplier_output4x2_dot_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s1_output2x2_mla_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_3x3_s2_output2x2_mla_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp"
- ]
- },
- "neon": {
- "estate64": [
- "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp16.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/depthwise_fp32.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8q.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/depthwise_u8s8u8q.cpp",
- "src/core/NEON/kernels/arm_conv/depthwise/interleaves/8b_mla.cpp",
+ "src/core/NEON/kernels/arm_conv/depthwise/kernels/sve_u8s8u8q_nhwc_5x5_s1_output2x2_mla_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_s8q_3x3_dot.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/interleaves/a64_u8q_3x3_dot.cpp",
"src/core/NEON/kernels/arm_conv/depthwise/kernels/a64_fp16_nhwc_3x3_s1_output2x2_mla_depthfirst/generic_direct.cpp",
@@ -1059,166 +1255,122 @@
}
}
},
- "DepthToSpaceLayer": {
- "files": {
- "kernel": [
- "src/core/NEON/kernels/NEDepthToSpaceLayerKernel.cpp"
- ]
- }
- },
"Dequantize": {
"files": {
- "operator": [
- "src/cpu/operators/CpuDequantize.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuDequantizeKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuDequantize.cpp",
+ "src/cpu/kernels/CpuDequantizeKernel.cpp",
+ "src/runtime/NEON/functions/NEDequantizationLayer.cpp"
]
}
},
- "DirectConv2d": {
- "deps": [
- "Activation",
- "FillBorder"
- ],
+ "DetectionPostProcess": {
+ "deps": [ "Dequantize" ],
"files": {
- "operator": [
- "src/cpu/operators/CpuDirectConv2d.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuDirectConv2dKernel.cpp",
- "src/cpu/kernels/CpuDirectConv2dOutputStageKernel.cpp"
- ]
+ "common" : [ "src/runtime/NEON/functions/NEDetectionPostProcessLayer.cpp" ]
}
},
- "Elementwise": {
+ "ElementwiseBinary": {
"files": {
- "operator": [
- "src/cpu/operators/CpuElementwise.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuElementwiseKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuElementwise.cpp",
+ "src/cpu/kernels/CpuElementwiseKernel.cpp",
+ "src/runtime/NEON/functions/NEElementwiseOperations.cpp"
],
"sve": {
- "all": [
- "src/cpu/kernels/elementwise/sve/elementwise.cpp"
- ]
+ "common": [ "src/cpu/kernels/elementwise/sve/elementwise.cpp" ]
}
}
},
- "ElementwiseUnary": {
+ "ElementwiseUnary":{
"files": {
- "operator": [
- "src/cpu/operators/CpuElementwiseUnary.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuElementwiseUnaryKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuElementwiseUnary.cpp",
+ "src/cpu/kernels/CpuElementwiseUnaryKernel.cpp",
+ "src/runtime/NEON/functions/NEElementwiseUnaryLayer.cpp"
],
"sve": {
- "all": [
- "src/cpu/kernels/elementwise/sve/elementwise_unary.cpp"
- ]
+ "common": [ "src/cpu/kernels/elementwise/sve/elementwise_unary.cpp" ]
}
}
},
"FFT1D": {
"files": {
- "kernel": [
+ "common": [
"src/core/NEON/kernels/NEFFTDigitReverseKernel.cpp",
"src/core/NEON/kernels/NEFFTRadixStageKernel.cpp",
- "src/core/NEON/kernels/NEFFTScaleKernel.cpp"
+ "src/core/NEON/kernels/NEFFTScaleKernel.cpp",
+ "src/runtime/NEON/functions/NEFFT1D.cpp"
]
}
},
- "FillBorder": {
+ "FFT2D": {
+ "deps": [ "FFT1D" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEFillBorderKernel.cpp"
+ "common": [
+ "src/runtime/NEON/functions/NEFFT2D.cpp"
]
}
},
- "Flatten": {
- "deps: ": [
- "Reshape"
- ],
+ "Fill": {
"files": {
- "operator": [
- "src/cpu/operators/CpuFlatten.cpp"
+ "common": [
+ "src/cpu/operators/CpuFill.cpp",
+ "src/cpu/kernels/CpuFillKernel.cpp",
+ "src/runtime/NEON/functions/NEFill.cpp"
]
}
},
- "Fill": {
+ "Flatten": {
+ "deps: ": [ "Reshape" ],
"files": {
- "operator": [
- "src/cpu/operators/CpuFill.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuFillKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuFlatten.cpp",
+ "src/runtime/NEON/functions/NEFlattenLayer.cpp"
]
}
},
"Floor": {
"files": {
- "operator": [
- "src/cpu/operators/CpuFloor.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuFloorKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuFloor.cpp",
+ "src/cpu/kernels/CpuFloorKernel.cpp",
+ "src/runtime/NEON/functions/NEFloor.cpp"
],
"neon": {
- "fp32": [
- "src/cpu/kernels/floor/neon/fp32.cpp"
- ],
- "fp16": [
- "src/cpu/kernels/floor/neon/fp16.cpp"
- ]
+ "fp32": [ "src/cpu/kernels/floor/neon/fp32.cpp" ],
+ "fp16": [ "src/cpu/kernels/floor/neon/fp16.cpp" ]
}
}
},
"FullyConnected": {
- "deps": [
- "CpuFlatten",
- "CpuConvertFullyConnectedWeights",
- "CpuGemm",
- "CpuGemmLowpMatrixMultiplyCore"
- ],
+ "deps": [ "Flatten", "Gemm", "Transpose"],
"files": {
- "operator": [
- "src/cpu/operators/CpuFullyConnected.cpp"
+ "common": [
+ "src/cpu/kernels/CpuConvertFullyConnectedWeightsKernel.cpp",
+ "src/cpu/operators/CpuConvertFullyConnectedWeights.cpp",
+ "src/cpu/operators/CpuFullyConnected.cpp",
+ "src/runtime/NEON/functions/NEConvertFullyConnectedWeights.cpp",
+ "src/runtime/NEON/functions/NEFullyConnectedLayer.cpp"
]
- },
- "kernel": [
- "CpuTransposeKernel"
- ]
+ }
},
- "FuseBatchNormalization": {
+ "Gather": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEFuseBatchNormalizationKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEGatherKernel.cpp",
+ "src/runtime/NEON/functions/NEGather.cpp"
]
}
},
- "GEMM": {
+ "Gemm": {
"files": {
- "operator" : ["src/cpu/operators/CpuGemm.cpp"],
- "kernel": [
+ "common": [
+ "src/cpu/kernels/CpuConvertQuantizedSignednessKernel.cpp",
"src/cpu/kernels/CpuGemmMatrixAdditionKernel.cpp",
"src/cpu/kernels/CpuGemmMatrixMultiplyKernel.cpp",
"src/cpu/kernels/CpuGemmTranspose1xWKernel.cpp",
- "src/cpu/kernels/CpuGemmInterleave4x4Kernel.cpp"
- ]
- }
- },
- "GEMMLowp": {
- "deps": [
- "GemmAssemblyDispatch"
- ],
- "files": {
- "operator" : [
- "src/cpu/operators/CpuGemmLowpOutputStage.cpp",
- "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp"
- ],
- "kernel": [
+ "src/cpu/kernels/CpuGemmInterleave4x4Kernel.cpp",
"src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ScaleKernel.cpp",
"src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt16ScaleByFixedPointKernel.cpp",
"src/cpu/kernels/CpuGemmLowpQuantizeDownInt32ToInt8ScaleByFixedPointKernel.cpp",
@@ -1226,36 +1378,12 @@
"src/cpu/kernels/CpuGemmLowpMatrixMultiplyKernel.cpp",
"src/cpu/kernels/CpuGemmLowpMatrixReductionKernel.cpp",
"src/cpu/kernels/CpuGemmLowpOffsetContributionOutputStageKernel.cpp",
- "src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.cpp"
- ]
- }
- },
- "GEMMConvolution": {
- "deps": [
- "Activation",
- "Col2Im",
- "Reshape",
- "Im2Col",
- "GEMMLowpOffsetContributionOutputStage",
- "ConvertQuantizedSignedness"
- ],
- "files": {
- "operator": [
- "src/cpu/operators/CpuGemmConv2d.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuWeightsReshapeKernel.cpp"
- ]
- }
- },
- "GemmAssemblyDispatch": {
- "files": {
- "operator": [
- "src/cpu/operators/internal/CpuGemmAssemblyDispatch.cpp"
- ],
- "kernel": [
- "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp",
+ "src/cpu/kernels/CpuGemmLowpOffsetContributionKernel.cpp",
+ "src/cpu/operators/CpuGemm.cpp",
+ "src/cpu/operators/CpuGemmLowpOutputStage.cpp",
+ "src/cpu/operators/CpuGemmLowpMatrixMultiplyCore.cpp",
"src/core/NEON/kernels/arm_gemm/gemm_bf16.cpp",
+ "src/core/NEON/kernels/arm_gemm/gemm_fp16.cpp",
"src/core/NEON/kernels/arm_gemm/gemm_fp32.cpp",
"src/core/NEON/kernels/arm_gemm/gemm_int16.cpp",
"src/core/NEON/kernels/arm_gemm/gemm_int8.cpp",
@@ -1263,14 +1391,17 @@
"src/core/NEON/kernels/arm_gemm/gemm_quint8.cpp",
"src/core/NEON/kernels/arm_gemm/gemm_uint16.cpp",
"src/core/NEON/kernels/arm_gemm/gemm_uint8.cpp",
+ "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp",
"src/core/NEON/kernels/arm_gemm/mergeresults-fp16.cpp",
"src/core/NEON/kernels/arm_gemm/mergeresults.cpp",
- "src/core/NEON/kernels/arm_gemm/interleave_indirect.cpp",
"src/core/NEON/kernels/arm_gemm/misc.cpp",
"src/core/NEON/kernels/arm_gemm/quantized.cpp",
"src/core/NEON/kernels/arm_gemm/rowsum_indirect_s8.cpp",
"src/core/NEON/kernels/arm_gemm/rowsum_indirect_u8.cpp",
- "src/core/NEON/kernels/arm_gemm/transform.cpp"
+ "src/core/NEON/kernels/arm_gemm/transform.cpp",
+ "src/runtime/NEON/functions/NEGEMM.cpp",
+ "src/runtime/NEON/functions/NEGEMMLowpMatrixMultiplyCore.cpp",
+ "src/runtime/NEON/functions/NEGEMMLowpOutputStage.cpp"
],
"neon": {
"estate32": [
@@ -1344,7 +1475,7 @@
]
},
"sve": {
- "all": [
+ "common": [
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_dot_6x4VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_bf16fp32_mmla_6x4VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_hybrid_fp16_mla_6x4VL/a64fx.cpp",
@@ -1384,152 +1515,196 @@
"src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_s8s32_dot_8x1VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/kernels/sve_smallK_hybrid_u8u32_dot_8x1VL/generic.cpp",
"src/core/NEON/kernels/arm_gemm/mergeresults-sve.cpp",
- "src/core/NEON/kernels/arm_gemm/transform-sve.cpp"
+ "src/core/NEON/kernels/arm_gemm/transform-sve.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s16_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_4x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_s8_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u16_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_4x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_gemm_u8_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hgemm_8x24/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_bf16fp32_mmla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp16_mla_6x32/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_4x24/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32_mla_8x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_4x24/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_fp32bf16fp32_mmla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_dot_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qa_mmla_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8qs_mmla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_s8s32_mmla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_dot_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8qa_mmla_4x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_dot_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_hybrid_u8u32_mmla_6x16/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_dot_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_bf16fp32_mmla_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_s8s32_mmla_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_interleaved_u8u32_mmla_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a53.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/a55r1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x12/x1.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemm_8x6/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_sgemv_pretransposed/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_6x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_fp32_mla_8x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_6x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_s8s32_dot_8x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_6x4/generic.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/a55.cpp",
+ "src/core/NEON/kernels/arm_gemm/kernels/a64_smallK_hybrid_u8u32_dot_8x4/generic.cpp"
]
}
}
},
- "GemmDirectConv2d": {
- "deps": [
- "Activation",
- "GemmAssemblyDispatch",
- "Permute"
- ],
- "files": {
- "operator": [
- "src/cpu/operators/CpuGemmDirectConv2d.cpp"
- ]
- }
- },
- "Mul": {
- "files": {
- "operator": [
- "src/cpu/operators/CpuMul.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuMulKernel.cpp"
- ]
- }
- },
- "Quantize": {
- "files": {
- "operator": [
- "src/cpu/operators/CpuQuantize.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuQuantizeKernel.cpp"
- ]
- }
- },
- "Reshape": {
- "files": {
- "operator": [
- "src/cpu/operators/CpuReshape.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuReshapeKernel.cpp"
- ]
- }
- },
- "Gather": {
+ "GenerateProposals": {
+ "deps": [ "BoundingBoxTransform", "Dequantize", "Pad", "Permute", "Quantize", "Reshape" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEGatherKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEGenerateProposalsLayer.cpp"
]
}
},
- "GenerateProposalsLayer": {
+ "InstanceNormalize": {
+ "deps": [ "Permute", "Reduction" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEGenerateProposalsLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEInstanceNormalizationLayer.cpp"
]
}
},
- "Im2Col": {
+ "L2Normalize": {
+ "deps": [ "Reduction" ],
"files": {
- "kernel": [
- "src/cpu/kernels/CpuIm2ColKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEL2NormalizeLayer.cpp"
]
}
},
- "InstanceNormalization": {
+ "Logical": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEInstanceNormalizationLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NELogicalKernel.cpp",
+ "src/runtime/NEON/functions/NELogical.cpp"
]
}
},
- "L2Normalize": {
+ "LSTM": {
"deps": [
- "Reduction"
+ "Activation",
+ "Concatenate",
+ "Copy",
+ "Dequantize",
+ "ElementwiseBinary",
+ "Fill",
+ "FullyConnected",
+ "Gemm",
+ "MeanStdDevNormalize",
+ "Mul",
+ "Quantize",
+ "Slice",
+ "Transpose"
],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEL2NormalizeLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp",
+ "src/runtime/NEON/functions/NELSTMLayer.cpp",
+ "src/runtime/NEON/functions/NELSTMLayerQuantized.cpp",
+ "src/runtime/NEON/functions/NEQLSTMLayer.cpp"
]
}
},
- "Logical": {
+ "MaxUnpool2d": {
+ "deps": [ "Fill" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NELogicalKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEMaxUnpoolingLayer.cpp"
]
}
},
- "MaxUnpooling": {
+ "Mean": {
+ "deps" : [ "Reduction" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.cpp"
- ]
+ "common": [ "src/runtime/NEON/functions/NEReduceMean.cpp" ]
}
},
- "MeanStdDevNormalization": {
+ "MeanStdDevNormalize": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.cpp",
+ "src/runtime/NEON/functions/NEMeanStdDevNormalizationLayer.cpp"
]
}
},
- "MinMax": {
+ "Mul": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEMinMaxLayerKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuMul.cpp",
+ "src/cpu/kernels/CpuMulKernel.cpp",
+ "src/runtime/NEON/functions/NEPixelWiseMultiplication.cpp"
]
}
},
- "Normalization": {
- "deps": [
- "PixelWiseMultiplication"
- ],
+ "Normalize": {
+ "deps": [ "Mul" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NENormalizationLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NENormalizationLayerKernel.cpp",
+ "src/runtime/NEON/functions/NENormalizationLayer.cpp"
]
}
},
"Pad": {
+ "deps": [ "Concatenate", "Copy", "StridedSlice" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEPadLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEPadLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEPadLayer.cpp"
]
}
},
"Permute": {
"files": {
- "operator": [
- "src/cpu/operators/CpuPermute.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuPermuteKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuPermute.cpp",
+ "src/cpu/kernels/CpuPermuteKernel.cpp",
+ "src/runtime/NEON/functions/NEPermute.cpp"
]
}
},
"Pool2d": {
"files": {
- "operator": [
- "src/cpu/operators/CpuPool2d.cpp"
- ],
- "kernel": [
+ "common": [
+ "src/cpu/operators/CpuPool2d.cpp",
"src/cpu/kernels/CpuPool2dKernel.cpp",
"src/cpu/kernels/internal/CpuPool2dAssemblyWrapperKernel.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/cpp_nhwc_1x1_stride_any_depthfirst/generic.cpp",
@@ -1538,24 +1713,15 @@
"src/core/NEON/kernels/arm_conv/pooling/pooling_s8.cpp",
"src/core/NEON/kernels/arm_conv/pooling/pooling_s8q.cpp",
"src/core/NEON/kernels/arm_conv/pooling/pooling_u8.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp"
+ "src/core/NEON/kernels/arm_conv/pooling/pooling_u8q.cpp",
+ "src/runtime/NEON/functions/NEPoolingLayer.cpp"
],
"neon": {
- "nchw": [
- "src/cpu/kernels/pool2d/neon/nchw/all.cpp"
- ],
- "fp32": [
- "src/cpu/kernels/pool2d/neon/fp32.cpp"
- ],
- "fp16": [
- "src/cpu/kernels/pool2d/neon/fp16.cpp"
- ],
- "qasymm8": [
- "src/cpu/kernels/pool2d/neon/qasymm8.cpp"
- ],
- "qasymm8_signed": [
- "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp"
- ],
+ "nchw": [ "src/cpu/kernels/pool2d/neon/nchw/all.cpp" ],
+ "fp16": [ "src/cpu/kernels/pool2d/neon/fp16.cpp" ],
+ "fp32": [ "src/cpu/kernels/pool2d/neon/fp32.cpp" ],
+ "qasymm8": [ "src/cpu/kernels/pool2d/neon/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" ],
"estate64": [
"src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
@@ -1578,15 +1744,17 @@
]
},
"sve": {
- "all": [
- "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp",
+ "qasymm8": [ "src/cpu/kernels/pool2d/neon/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/pool2d/neon/qasymm8_signed.cpp" ],
+ "common": [
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp16_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_fp32_nhwc_max_generic_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_avg_generic_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_s8_nhwc_max_generic_depthfirst/generic.cpp",
@@ -1596,239 +1764,258 @@
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8_nhwc_max_generic_depthfirst/generic.cpp",
"src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
- "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp"
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/sve_u8q_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp16_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_3x3_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_fp32_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_s8q_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_2x2_s1_output2x2_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8_nhwc_max_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_avg_generic_depthfirst/generic.cpp",
+ "src/core/NEON/kernels/arm_conv/pooling/kernels/a64_u8q_nhwc_max_generic_depthfirst/generic.cpp"
]
}
}
},
+ "PRelu": {
+ "deps": [ "ElementwiseBinary" ],
+ "files": {
+ "common": [
+ "src/runtime/NEON/functions/NEPReluLayer.cpp"
+ ]
+ }
+ },
"PriorBox": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEPriorBoxLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEPriorBoxLayer.cpp"
]
}
},
- "QLSTMLayerNormalization": {
+ "Quantize": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEQLSTMLayerNormalizationKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuQuantize.cpp",
+ "src/cpu/kernels/CpuQuantizeKernel.cpp",
+ "src/runtime/NEON/functions/NEQuantizationLayer.cpp"
]
}
},
"Range": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NERangeKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NERangeKernel.cpp",
+ "src/runtime/NEON/functions/NERange.cpp"
]
}
},
- "ReductionOperation": {
+ "Reduction":{
+ "deps": [ "Reshape" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEReductionOperationKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEReductionOperationKernel.cpp",
+ "src/runtime/NEON/functions/NEReductionOperation.cpp"
]
}
},
"Remap": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NERemapKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NERemapKernel.cpp",
+ "src/runtime/NEON/functions/NERemap.cpp"
]
}
},
"Reorg": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEReorgLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEReorgLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEReorgLayer.cpp"
+ ]
+ }
+ },
+ "Reshape": {
+ "files": {
+ "common": [
+ "src/cpu/operators/CpuReshape.cpp",
+ "src/cpu/kernels/CpuReshapeKernel.cpp",
+ "src/runtime/NEON/functions/NEReshapeLayer.cpp"
]
}
},
"Reverse": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEReverseKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEReverseKernel.cpp",
+ "src/runtime/NEON/functions/NEReverse.cpp"
]
}
},
+ "RNN": {
+ "deps": [ "Activation", "Add", "FullyConnected", "Gemm"],
+ "files": {
+ "common": [ "src/runtime/NEON/functions/NERNNLayer.cpp" ]
+ }
+ },
"ROIAlign": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEROIAlignLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEROIAlignLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEROIAlignLayer.cpp"
]
}
},
- "ROIPooling": {
+ "ROIPool2d": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NEROIPoolingLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEROIPoolingLayer.cpp"
]
}
},
+ "Scale": {
+ "files": {
+ "common": [
+ "src/cpu/operators/CpuScale.cpp",
+ "src/cpu/kernels/CpuScaleKernel.cpp",
+ "src/runtime/NEON/functions/NEScale.cpp"
+ ],
+ "sve": {
+ "fp16": [ "src/cpu/kernels/scale/sve/fp16.cpp" ],
+ "fp32": [ "src/cpu/kernels/scale/sve/fp32.cpp" ],
+ "integer": [ "src/cpu/kernels/scale/sve/integer.cpp" ],
+ "qasymm8": [ "src/cpu/kernels/scale/sve/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/scale/sve/qasymm8_signed.cpp" ]
+
+ },
+ "neon": {
+ "fp16": [ "src/cpu/kernels/scale/neon/fp16.cpp" ],
+ "integer": [ "src/cpu/kernels/scale/neon/integer.cpp" ],
+ "qasymm8": [ "src/cpu/kernels/scale/neon/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/scale/neon/qasymm8_signed.cpp" ]
+ }
+ }
+ },
"Select": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NESelectKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NESelectKernel.cpp",
+ "src/runtime/NEON/functions/NESelect.cpp"
]
}
},
- "SpaceToBatch": {
+ "Slice": {
+ "deps": [ "StridedSlice" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp"
- ]
+ "common": [ "src/runtime/NEON/functions/NESlice.cpp" ]
}
},
- "SpaceToDepth": {
+ "Softmax": {
+ "deps": [
+ "Permute"
+ ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuSoftmax.cpp",
+ "src/cpu/kernels/CpuSoftmaxKernel.cpp",
+ "src/runtime/NEON/functions/NESoftmaxLayer.cpp"
+ ],
+ "sve": {
+ "common": [ "src/cpu/kernels/softmax/impl/sve/impl.cpp" ]
+ }
+ }
+ },
+ "SpaceToBatch": {
+ "files": {
+ "common": [
+ "src/core/NEON/kernels/NESpaceToBatchLayerKernel.cpp",
+ "src/runtime/NEON/functions/NESpaceToBatchLayer.cpp"
]
}
},
- "Stack": {
+ "SpaceToDepth": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEStackLayerKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NESpaceToDepthLayerKernel.cpp",
+ "src/runtime/NEON/functions/NESpaceToDepthLayer.cpp"
]
}
},
- "StridedSlice": {
+ "Split": {
+ "deps": [ "StridedSlice" ],
"files": {
- "kernel": [
- "src/core/NEON/kernels/NEStridedSliceKernel.cpp"
+ "common": [
+ "src/runtime/NEON/functions/NESplit.cpp"
]
}
},
- "Scale": {
+ "Stack": {
"files": {
- "operator": [
- "src/cpu/operators/CpuScale.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuScaleKernel.cpp"
- ],
- "sve": {
- "fp32": [
- "src/cpu/kernels/scale/sve/fp32.cpp"
- ],
- "fp16": [
- "src/cpu/kernels/scale/sve/fp16.cpp"
- ],
- "qasymm8": [
- "src/cpu/kernels/scale/sve/qasymm8.cpp"
- ],
- "qasymm8_signed": [
- "src/cpu/kernels/scale/sve/qasymm8_signed.cpp"
- ],
- "integer": [
- "src/cpu/kernels/scale/sve/integer.cpp"
- ]
- },
- "neon": {
- "fp16": [
- "src/cpu/kernels/scale/neon/fp16.cpp"
- ],
- "qasymm8": [
- "src/cpu/kernels/scale/neon/qasymm8.cpp"
- ],
- "qasymm8_signed": [
- "src/cpu/kernels/scale/neon/qasymm8_signed.cpp"
- ],
- "integer": [
- "src/cpu/kernels/scale/neon/integer.cpp"
- ]
- }
+ "common": [
+ "src/core/NEON/kernels/NEStackLayerKernel.cpp",
+ "src/runtime/NEON/functions/NEStackLayer.cpp"
+ ]
}
},
- "Softmax": {
- "deps": [
- "Permute"
- ],
+ "StridedSlice": {
"files": {
- "operator": [
- "src/cpu/operators/CpuSoftmax.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuSoftmaxKernel.cpp"
- ],
- "sve": {
- "all": [
- "src/cpu/kernels/softmax/impl/sve/impl.cpp"
- ]
- }
+ "common": [
+ "src/core/NEON/kernels/NEStridedSliceKernel.cpp",
+ "src/runtime/NEON/functions/NEStridedSlice.cpp"
+ ]
}
},
"Sub": {
"files": {
- "operator": [
- "src/cpu/operators/CpuSub.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuSubKernel.cpp"
+ "common": [
+ "src/cpu/operators/CpuSub.cpp",
+ "src/cpu/kernels/CpuSubKernel.cpp",
+ "src/runtime/NEON/functions/NEArithmeticSubtraction.cpp"
],
+ "sve": {
+ "qasymm8": [ "src/cpu/kernels/sub/neon/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/sub/neon/qasymm8_signed.cpp" ],
+ "qsymm16": [ "src/cpu/kernels/sub/neon/qsymm16.cpp" ]
+ },
"neon": {
- "qsymm16": [
- "src/cpu/kernels/sub/neon/qsymm16.cpp"
- ],
- "qasymm8": [
- "src/cpu/kernels/sub/neon/qasymm8.cpp"
- ],
- "qasymm8_signed": [
- "src/cpu/kernels/sub/neon/qasymm8_signed.cpp"
- ]
+ "qasymm8": [ "src/cpu/kernels/sub/neon/qasymm8.cpp" ],
+ "qasymm8_signed": [ "src/cpu/kernels/sub/neon/qasymm8_signed.cpp" ],
+ "qsymm16": [ "src/cpu/kernels/sub/neon/qsymm16.cpp" ]
}
}
},
- "Transpose": {
+ "Tile": {
"files": {
- "operator": [
- "src/cpu/operators/CpuTranspose.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuTransposeKernel.cpp"
+ "common": [
+ "src/core/NEON/kernels/NETileKernel.cpp",
+ "src/runtime/NEON/functions/NETile.cpp"
]
}
},
- "Tile": {
+ "Transpose": {
"files": {
- "kernel": [
- "src/core/NEON/kernels/NETileKernel.cpp"
+ "common": [
+ "src/cpu/kernels/CpuTransposeKernel.cpp",
+ "src/cpu/operators/CpuTranspose.cpp",
+ "src/runtime/NEON/functions/NETranspose.cpp"
]
}
},
- "WinogradConvolution": {
- "deps": [
- "Activation",
- "Permute"
- ],
+ "Unstack": {
+ "deps": [ "StridedSlice" ],
"files": {
- "operator": [
- "src/cpu/operators/CpuWinogradConv2d.cpp"
- ],
- "kernel": [
- "src/cpu/kernels/CpuWinogradConv2dKernel.cpp",
- "src/core/NEON/kernels/convolution/winograd/padding.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_1x8_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp16_fp16_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_4x4_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp16_fp16_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/input_6x6_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2_7_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_3x3_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_2x2_5x5_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4_5_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp16_fp16_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_4x4_3x3_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/output_6_3_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2_7_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_3x3_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_2x2_5x5_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4_5_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp16_fp16_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_4x4_3x3_fp32_fp32_integers.cpp",
- "src/core/NEON/kernels/convolution/winograd/winograd_transforms/weights_6_3_fp32_fp32_integers.cpp"
- ]
+ "common": [ "src/runtime/NEON/functions/NEUnstack.cpp" ]
}
}
}
diff --git a/src/core/CL/CLKernels.h b/src/core/CL/CLKernels.h
index f9d560f1b7..0c295aae6a 100644
--- a/src/core/CL/CLKernels.h
+++ b/src/core/CL/CLKernels.h
@@ -47,7 +47,6 @@
#include "src/core/CL/kernels/CLL2NormalizeLayerKernel.h"
#include "src/core/CL/kernels/CLMaxUnpoolingLayerKernel.h"
#include "src/core/CL/kernels/CLMeanStdDevNormalizationKernel.h"
-#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
#include "src/core/CL/kernels/CLNormalizationLayerKernel.h"
#include "src/core/CL/kernels/CLNormalizePlanarYUVLayerKernel.h"
#include "src/core/CL/kernels/CLPadLayerKernel.h"
diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp b/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
deleted file mode 100644
index f0202a9c5d..0000000000
--- a/src/core/CL/kernels/CLMinMaxLayerKernel.cpp
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/CL/kernels/CLMinMaxLayerKernel.h"
-
-#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/CL/CLKernelLibrary.h"
-#include "arm_compute/core/CL/ICLTensor.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/AccessWindowStatic.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-#include "support/StringSupport.h"
-
-#include <climits>
-
-using namespace arm_compute;
-using namespace arm_compute::misc::shape_calculator;
-
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() < 3);
-
- if(output->tensor_shape().total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- TensorShape output_shape = compute_min_max_shape(input);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
- }
-
- return Status{};
-}
-
-std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
-{
- TensorShape output_shape = compute_min_max_shape(input);
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(*output, output_shape, 1, input->data_type());
-
- const unsigned int num_elems_processed_per_iteration = 1;
-
- // Configure kernel window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
- AccessWindowStatic output_access(output, 0, 0, 2, output->dimension(1));
-
- bool window_changed = update_window_and_padding(win, input_access, output_access);
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_tuple(err, win);
-}
-} // namespace
-
-CLMinMaxLayerKernel::CLMinMaxLayerKernel()
- : _input(nullptr), _output(nullptr)
-{
- _type = CLKernelType::ELEMENTWISE;
-}
-
-void CLMinMaxLayerKernel::configure(const ICLTensor *input, ICLTensor *output)
-{
- configure(CLKernelLibrary::get().get_compile_context(), input, output);
-}
-
-void CLMinMaxLayerKernel::configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
- _input = input;
- _output = output;
-
- std::set<std::string> build_opts;
- build_opts.emplace("-DWIDTH=" + support::cpp11::to_string(input->info()->dimension(0)));
- build_opts.emplace("-DHEIGHT=" + support::cpp11::to_string(input->info()->dimension(1)));
- build_opts.emplace("-DDEPTH=" + support::cpp11::to_string(input->info()->dimension(2)));
-
- // Create kernel
- _kernel = create_kernel(compile_context, "minmax_layer", build_opts);
-
- auto win_config = validate_and_configure_window(input->info(), output->info());
-
- ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
- ICLKernel::configure_internal(std::get<1>(win_config));
-}
-
-Status CLMinMaxLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
- ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
-
- return Status{};
-}
-
-void CLMinMaxLayerKernel::reset(cl::CommandQueue &queue)
-{
- _output->map(queue, true);
-
- Window window_output;
- window_output.use_tensor_dimensions(_output->info()->tensor_shape());
- window_output.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- Iterator output(_output, window_output);
-
- // Reset output
- execute_window_loop(window_output, [&](const Coordinates &)
- {
- auto *ptr = reinterpret_cast<float *>(output.ptr());
- ptr[0] = std::numeric_limits<float>::max();
- ptr[1] = std::numeric_limits<float>::min();
- },
- output);
-
- _output->unmap(queue);
-}
-
-void CLMinMaxLayerKernel::run(const Window &window, cl::CommandQueue &queue)
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window);
-
- Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), 3);
- Window slice = window_collapsed.first_slice_window_3D();
- slice.set(Window::DimX, Window::Dimension(0, 1, 1));
- slice.set(Window::DimY, Window::Dimension(0, 1, 1));
- slice.set(Window::DimZ, Window::Dimension(0, 1, 1));
-
- do
- {
- Window output_slice = slice.shift_dimensions(2);
-
- unsigned int idx = 0;
- // Set inputs
- add_3D_tensor_argument(idx, _input, slice);
- add_1D_tensor_argument(idx, _output, output_slice);
- enqueue(queue, *this, slice, lws_hint());
- }
- while(window_collapsed.slide_window_slice_3D(slice));
-}
diff --git a/src/core/CL/kernels/CLMinMaxLayerKernel.h b/src/core/CL/kernels/CLMinMaxLayerKernel.h
deleted file mode 100644
index aa2ff3f375..0000000000
--- a/src/core/CL/kernels/CLMinMaxLayerKernel.h
+++ /dev/null
@@ -1,87 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#ifndef ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
-#define ARM_COMPUTE_CLMINMAXLAYERKERNEL_H
-
-#include "src/core/CL/ICLKernel.h"
-
-namespace arm_compute
-{
-class ICLTensor;
-
-/** Interface for the kernel to perform min max search on a 3D tensor.
- */
-class CLMinMaxLayerKernel : public ICLKernel
-{
-public:
- /** Default constructor */
- CLMinMaxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLayerKernel(const CLMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- CLMinMaxLayerKernel &operator=(const CLMinMaxLayerKernel &) = delete;
- /** Allow instances of this class to be moved */
- CLMinMaxLayerKernel(CLMinMaxLayerKernel &&) = default;
- /** Allow instances of this class to be moved */
- CLMinMaxLayerKernel &operator=(CLMinMaxLayerKernel &&) = default;
- /** Initialise the kernel's input and output.
- *
- * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
- * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- */
- void configure(const ICLTensor *input, ICLTensor *output);
- /** Initialise the kernel's input and output.
- *
- * @param[in] compile_context The compile context to be used.
- * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches.Data types supported: F32.
- * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- */
- void configure(const CLCompileContext &compile_context, const ICLTensor *input, ICLTensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: F32.
- * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
-
- /** Resets global minimum and maximum
- *
- * @param[in,out] queue Command queue on which to map and unmap the min_max tensor
- */
- void reset(cl::CommandQueue &queue);
-
- // Inherited methods overridden:
- void run(const Window &window, cl::CommandQueue &queue) override;
-
-private:
- const ICLTensor *_input;
- ICLTensor *_output;
-};
-} // namespace arm_compute
-#endif /*ARM_COMPUTE_CLMINMAXLAYERKERNEL_H */
diff --git a/src/core/NEON/NEKernels.h b/src/core/NEON/NEKernels.h
index 6d45a9d80c..af301c8d16 100644
--- a/src/core/NEON/NEKernels.h
+++ b/src/core/NEON/NEKernels.h
@@ -47,7 +47,6 @@
#include "src/core/NEON/kernels/NELogicalKernel.h"
#include "src/core/NEON/kernels/NEMaxUnpoolingLayerKernel.h"
#include "src/core/NEON/kernels/NEMeanStdDevNormalizationKernel.h"
-#include "src/core/NEON/kernels/NEMinMaxLayerKernel.h"
#include "src/core/NEON/kernels/NENormalizationLayerKernel.h"
#include "src/core/NEON/kernels/NEPadLayerKernel.h"
#include "src/core/NEON/kernels/NEPriorBoxLayerKernel.h"
diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp b/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp
deleted file mode 100644
index 5ea8947fa0..0000000000
--- a/src/core/NEON/kernels/NEMinMaxLayerKernel.cpp
+++ /dev/null
@@ -1,224 +0,0 @@
-/*
- * Copyright (c) 2017-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "src/core/NEON/kernels/NEMinMaxLayerKernel.h"
-
-#include "arm_compute/core/Coordinates.h"
-#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Helpers.h"
-#include "arm_compute/core/IAccessWindow.h"
-#include "arm_compute/core/ITensor.h"
-#include "arm_compute/core/TensorInfo.h"
-#include "arm_compute/core/Types.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/Window.h"
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/core/helpers/WindowHelpers.h"
-
-#include <algorithm>
-#include <arm_neon.h>
-#include <climits>
-#include <cstddef>
-
-using namespace arm_compute::misc::shape_calculator;
-
-namespace arm_compute
-{
-namespace
-{
-Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON(input->num_dimensions() < 3);
-
- if(output->tensor_shape().total_size() > 0)
- {
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output);
-
- TensorShape output_shape = compute_min_max_shape(input);
-
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(output->tensor_shape(), output_shape);
- }
-
- return Status{};
-}
-
-std::tuple<Status, Window> validate_and_configure_window(ITensorInfo *input, ITensorInfo *output)
-{
- TensorShape output_shape = compute_min_max_shape(input);
-
- // Output auto initialization if not yet initialized
- auto_init_if_empty(*output, output_shape, 1, input->data_type());
-
- constexpr unsigned int num_elems_processed_per_iteration = 1;
-
- // Configure kernel window
- Window win = calculate_max_window(*input, Steps(num_elems_processed_per_iteration));
- AccessWindowHorizontal input_access(input, 0, num_elems_processed_per_iteration);
- AccessWindowHorizontal output_access(output, 0, 2);
-
- bool window_changed = update_window_and_padding(win, input_access, output_access);
-
- Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{};
- return std::make_tuple(err, win);
-}
-} // namespace
-
-NEMinMaxLayerKernel::NEMinMaxLayerKernel()
- : _input(nullptr), _output(nullptr), _mtx()
-{
-}
-
-void NEMinMaxLayerKernel::configure(const ITensor *input, ITensor *output)
-{
- ARM_COMPUTE_ERROR_ON_NULLPTR(input, output);
- ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info()));
-
- _input = input;
- _output = output;
-
- auto win_config = validate_and_configure_window(input->info(), output->info());
-
- ARM_COMPUTE_ERROR_THROW_ON(std::get<0>(win_config));
-
- INEKernel::configure(std::get<1>(win_config));
-}
-
-Status NEMinMaxLayerKernel::validate(const ITensorInfo *input, const ITensorInfo *output)
-{
- ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output));
- ARM_COMPUTE_RETURN_ON_ERROR(std::get<0>(validate_and_configure_window(input->clone().get(), output->clone().get())));
-
- return Status{};
-}
-
-void NEMinMaxLayerKernel::run(const Window &window, const ThreadInfo &info)
-{
- ARM_COMPUTE_UNUSED(info);
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
- ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(INEKernel::window(), window);
-
- const int x_start = window.x().start();
- const int x_end = window.x().end();
-
- Window window_output;
- window_output.use_tensor_dimensions(_output->info()->tensor_shape());
- window_output.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- // Handle X dimension manually to split into two loops
- // First one will use vector operations, second one processes the left over pixels
- Window window_input(window);
- window_input.set(Window::DimX, Window::Dimension(0, 1, 1));
- window_input.set(3, Window::Dimension(0, 1, 1));
-
- Iterator input(_input, window_input);
- Iterator output(_output, window_output);
-
- execute_window_loop(window_output, [&](const Coordinates & id_batch)
- {
- float32x2_t carry_min = vdup_n_f32(std::numeric_limits<float>::max());
- float32x2_t carry_max = vdup_n_f32(std::numeric_limits<float>::lowest());
-
- float carry_min_scalar = std::numeric_limits<float>::max();
- float carry_max_scalar = std::numeric_limits<float>::lowest();
-
- execute_window_loop(window_input, [&](const Coordinates &)
- {
- int x = x_start;
- const auto in_ptr = reinterpret_cast<const float *>(input.ptr() + id_batch[1] * _input->info()->strides_in_bytes()[3]);
-
- // Vector loop
- for(; x <= x_end - 8; x += 8)
- {
- const float32x4x2_t pixels = vld2q_f32(in_ptr + x);
- const float32x4_t tmp_min1 = vminq_f32(pixels.val[0], pixels.val[1]);
- const float32x4_t tmp_max1 = vmaxq_f32(pixels.val[0], pixels.val[1]);
- const float32x2_t tmp_min2 = vmin_f32(vget_high_f32(tmp_min1), vget_low_f32(tmp_min1));
- const float32x2_t tmp_max2 = vmax_f32(vget_high_f32(tmp_max1), vget_low_f32(tmp_max1));
- carry_min = vmin_f32(tmp_min2, carry_min);
- carry_max = vmax_f32(tmp_max2, carry_max);
- }
-
- // Process leftover pixels
- for(; x < x_end; ++x)
- {
- const float pixel = in_ptr[x];
- carry_min_scalar = std::min(pixel, carry_min_scalar);
- carry_max_scalar = std::max(pixel, carry_max_scalar);
- }
- },
- input);
-
- // Reduce result
- carry_min = vpmin_f32(carry_min, carry_min);
- carry_max = vpmax_f32(carry_max, carry_max);
- carry_min = vpmin_f32(carry_min, carry_min);
- carry_max = vpmax_f32(carry_max, carry_max);
-
- // Extract max/min values
- const float min_i = std::min(vget_lane_f32(carry_min, 0), carry_min_scalar);
- const float max_i = std::max(vget_lane_f32(carry_max, 0), carry_max_scalar);
-
- auto out_ptr = reinterpret_cast<float *>(output.ptr());
-
- // Perform reduction of local min/max values
- update_min_max(out_ptr, min_i, max_i);
- },
- output);
-}
-
-void NEMinMaxLayerKernel::reset()
-{
- ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this);
-
- float32x2_t reset_values = vdup_n_f32(0.0f);
- reset_values = vset_lane_f32(std::numeric_limits<float>::max(), reset_values, 0);
- reset_values = vset_lane_f32(std::numeric_limits<float>::lowest(), reset_values, 1);
-
- Window window_output;
- window_output.use_tensor_dimensions(_output->info()->tensor_shape());
- window_output.set(Window::DimX, Window::Dimension(0, 1, 1));
-
- Iterator output(_output, window_output);
-
- execute_window_loop(window_output, [&](const Coordinates &)
- {
- vst1_f32(reinterpret_cast<float *>(output.ptr()), reset_values);
- },
- output);
-}
-
-void NEMinMaxLayerKernel::update_min_max(float *out_ptr, float min, float max)
-{
- arm_compute::lock_guard<Mutex> lock(_mtx);
-
- const float32x2_t old_min = vld1_dup_f32(out_ptr);
- const float32x2_t old_max = vld1_dup_f32(out_ptr + 1);
- const float32x2_t new_min = vmin_f32(vdup_n_f32(min), old_min);
- const float32x2_t new_max = vmax_f32(vdup_n_f32(max), old_max);
-
- vst1_f32(out_ptr, vzip_f32(new_min, new_max).val[0]);
-}
-} // namespace arm_compute
diff --git a/src/core/NEON/kernels/NEMinMaxLayerKernel.h b/src/core/NEON/kernels/NEMinMaxLayerKernel.h
deleted file mode 100644
index b4852ad9f2..0000000000
--- a/src/core/NEON/kernels/NEMinMaxLayerKernel.h
+++ /dev/null
@@ -1,90 +0,0 @@
-/*
- * Copyright (c) 2017-2020 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
-#define ARM_COMPUTE_NEMINMAXLAYERKERNEL_H
-
-#include "src/core/NEON/INEKernel.h"
-#include "support/Mutex.h"
-
-#include <cstdint>
-
-namespace arm_compute
-{
-class ITensor;
-
-/** Interface for the kernel to perform min max search on a 3D tensor. */
-class NEMinMaxLayerKernel : public INEKernel
-{
-public:
- const char *name() const override
- {
- return "NEMinMaxLayerKernel";
- }
- /** Default constructor */
- NEMinMaxLayerKernel();
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLayerKernel(const NEMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being copied (As this class contains pointers) */
- NEMinMaxLayerKernel &operator=(const NEMinMaxLayerKernel &) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxLayerKernel(NEMinMaxLayerKernel &&) = delete;
- /** Prevent instances of this class from being moved (As this class contains non movable objects) */
- NEMinMaxLayerKernel &operator=(NEMinMaxLayerKernel &&) = delete;
- /** Default destructor */
- ~NEMinMaxLayerKernel() = default;
-
- /** Initialise the kernel's input and outputs.
- *
- * @note output[0] = minimum
- * @note output[1] = maximum
- *
- * @param[in] input Input tensor with at least 3 dimensions. The dimensions over the third will be interpreted as batches. Data type supported: F32.
- * @param[out] output Output tensor with shape [2, batches, ...] which stores the minimum and maximum value for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32
- */
- void configure(const ITensor *input, ITensor *output);
- /** Static function to check if given info will lead to a valid configuration of @ref CLMinMaxLayerKernel
- *
- * @param[in] input Input tensor info. Data types supported: F32.
- * @param[in] output Output tensor info with shape [2, batches, ...] which stores the minimum and maximum values for each 3D input tensor.
- * The dimensions over the second must match the batched dimensions of the input tensor. Data types supported: F32.
- *
- * @return a status
- */
- static Status validate(const ITensorInfo *input, const ITensorInfo *output);
- /** Resets global minimum and maximum. */
- void reset();
-
- // Inherited methods overridden:
- void run(const Window &window, const ThreadInfo &info) override;
-
-private:
- void update_min_max(float *out_ptr, float min, float max);
- const ITensor *_input;
- ITensor *_output;
- arm_compute::Mutex _mtx;
-};
-} // namespace arm_compute
-#endif /* ARM_COMPUTE_NEMINMAXLAYERKERNEL_H */
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
index f38912d257..1c4c7576f5 100644
--- a/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
+++ b/src/core/NEON/kernels/arm_conv/depthwise/depthwise_s8q.cpp
@@ -62,13 +62,11 @@ namespace depthwise {
namespace
{
-
bool qp_weights_are_symmetric(const DepthwiseArgs &, const void *_qp)
{
const auto qp = static_cast<const arm_gemm::Requantize32 *>(_qp);
return qp->b_offset == 0;
}
-
}
static const DepthwiseImplementation<int8_t, int8_t, int8_t, Requantize32> depthwise_s8q_methods[] = {
diff --git a/src/runtime/CL/functions/CLFillBorder.cpp b/src/runtime/CL/functions/CLFillBorder.cpp
deleted file mode 100644
index de9b857977..0000000000
--- a/src/runtime/CL/functions/CLFillBorder.cpp
+++ /dev/null
@@ -1,45 +0,0 @@
-/*
- * Copyright (c) 2016-2021 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-#include "arm_compute/runtime/CL/functions/CLFillBorder.h"
-
-#include "src/core/CL/kernels/CLFillBorderKernel.h"
-
-#include "src/common/utils/Log.h"
-
-#include <utility>
-
-using namespace arm_compute;
-
-void CLFillBorder::configure(ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
-{
- configure(CLKernelLibrary::get().get_compile_context(), tensor, border_width, border_mode, constant_border_value);
-}
-
-void CLFillBorder::configure(const CLCompileContext &compile_context, ICLTensor *tensor, unsigned int border_width, BorderMode border_mode, const PixelValue &constant_border_value)
-{
- ARM_COMPUTE_LOG_PARAMS(tensor, border_width, border_mode, constant_border_value);
- auto k = std::make_unique<CLFillBorderKernel>();
- k->configure(compile_context, tensor, BorderSize(border_width), border_mode, constant_border_value);
- _kernel = std::move(k);
-}
diff --git a/tests/framework/instruments/OpenCLTimer.cpp b/tests/framework/instruments/OpenCLTimer.cpp
index 45eb4c5c60..e9f945bd95 100644
--- a/tests/framework/instruments/OpenCLTimer.cpp
+++ b/tests/framework/instruments/OpenCLTimer.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2019 Arm Limited.
+ * Copyright (c) 2017-2019, 2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -54,7 +54,13 @@ std::string OpenCLClock<output_timestamps>::id() const
template <bool output_timestamps>
OpenCLClock<output_timestamps>::OpenCLClock(ScaleFactor scale_factor)
- : _kernels(), _real_function(nullptr), _real_graph_function(nullptr), _prefix(), _timer_enabled(false)
+ : _kernels(),
+ _real_function(nullptr),
+#ifdef ARM_COMPUTE_GRAPH_ENABLED
+ _real_graph_function(nullptr),
+#endif /* ARM_COMPUTE_GRAPH_ENABLED */
+ _prefix(),
+ _timer_enabled(false)
{
auto q = CLScheduler::get().queue();
cl_command_queue_properties props = q.getInfo<CL_QUEUE_PROPERTIES>();
@@ -91,19 +97,17 @@ void OpenCLClock<output_timestamps>::test_start()
{
// Start intercepting enqueues:
ARM_COMPUTE_ERROR_ON(_real_function != nullptr);
- ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
- _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
- _real_graph_function = graph::TaskExecutor::get().execute_function;
- auto interceptor = [this](
- cl_command_queue command_queue,
- cl_kernel kernel,
- cl_uint work_dim,
- const size_t *gwo,
- const size_t *gws,
- const size_t *lws,
- cl_uint num_events_in_wait_list,
- const cl_event * event_wait_list,
- cl_event * event)
+ _real_function = CLSymbols::get().clEnqueueNDRangeKernel_ptr;
+ auto interceptor = [this](
+ cl_command_queue command_queue,
+ cl_kernel kernel,
+ cl_uint work_dim,
+ const size_t *gwo,
+ const size_t *gws,
+ const size_t *lws,
+ cl_uint num_events_in_wait_list,
+ const cl_event * event_wait_list,
+ cl_event * event)
{
if(this->_timer_enabled)
{
@@ -138,7 +142,11 @@ void OpenCLClock<output_timestamps>::test_start()
return this->_real_function(command_queue, kernel, work_dim, gwo, gws, lws, num_events_in_wait_list, event_wait_list, event);
}
};
+ CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
+#ifdef ARM_COMPUTE_GRAPH_ENABLED
+ ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
+ _real_graph_function = graph::TaskExecutor::get().execute_function;
// Start intercepting tasks:
auto task_interceptor = [this](graph::ExecutionTask & task)
{
@@ -153,9 +161,8 @@ void OpenCLClock<output_timestamps>::test_start()
this->_real_graph_function(task);
this->_prefix = "";
};
-
- CLSymbols::get().clEnqueueNDRangeKernel_ptr = interceptor;
graph::TaskExecutor::get().execute_function = task_interceptor;
+#endif /* ARM_COMPUTE_GRAPH_ENABLED */
}
template <bool output_timestamps>
@@ -175,9 +182,11 @@ void OpenCLClock<output_timestamps>::test_stop()
{
// Restore real function
CLSymbols::get().clEnqueueNDRangeKernel_ptr = _real_function;
+ _real_function = nullptr;
+#ifdef ARM_COMPUTE_GRAPH_ENABLED
graph::TaskExecutor::get().execute_function = _real_graph_function;
_real_graph_function = nullptr;
- _real_function = nullptr;
+#endif /* ARM_COMPUTE_GRAPH_ENABLED */
}
template <bool output_timestamps>
diff --git a/tests/framework/instruments/OpenCLTimer.h b/tests/framework/instruments/OpenCLTimer.h
index 9904035c20..1812272435 100644
--- a/tests/framework/instruments/OpenCLTimer.h
+++ b/tests/framework/instruments/OpenCLTimer.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2017-2018 Arm Limited.
+ * Copyright (c) 2017-2018, 2021 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -67,9 +67,11 @@ private:
};
std::list<kernel_info> _kernels;
std::function<decltype(clEnqueueNDRangeKernel)> _real_function;
- std::function<decltype(graph::execute_task)> _real_graph_function;
- std::string _prefix;
- bool _timer_enabled;
+#ifdef ARM_COMPUTE_GRAPH_ENABLED
+ std::function<decltype(graph::execute_task)> _real_graph_function;
+#endif /* ARM_COMPUTE_GRAPH_ENABLED */
+ std::string _prefix;
+ bool _timer_enabled;
#endif /* ARM_COMPUTE_CL */
private:
diff --git a/tests/framework/instruments/SchedulerTimer.cpp b/tests/framework/instruments/SchedulerTimer.cpp
index 35f960d368..b753485351 100644
--- a/tests/framework/instruments/SchedulerTimer.cpp
+++ b/tests/framework/instruments/SchedulerTimer.cpp
@@ -129,16 +129,24 @@ protected:
private:
std::list<struct SchedulerClock<output_timestamps>::kernel_info> &_kernels;
- std::map<std::string, SchedulerTimer::LayerData> &_layer_data_map;
- IScheduler &_real_scheduler;
- WallClock<output_timestamps> _timer;
- std::string _prefix;
+ std::map<std::string, SchedulerTimer::LayerData> &_layer_data_map;
+ IScheduler &_real_scheduler;
+ WallClock<output_timestamps> _timer;
+ std::string _prefix;
};
template <bool output_timestamps>
SchedulerClock<output_timestamps>::SchedulerClock(ScaleFactor scale_factor)
- : _kernels(), _layer_data_map(), _real_scheduler(nullptr), _real_scheduler_type(), _real_graph_function(nullptr),
- _scale_factor(scale_factor), _interceptor(nullptr), _scheduler_users()
+ : _kernels(),
+ _layer_data_map(),
+ _real_scheduler(nullptr),
+ _real_scheduler_type(),
+#ifdef ARM_COMPUTE_GRAPH_ENABLED
+ _real_graph_function(nullptr),
+#endif /* ARM_COMPUTE_GRAPH_ENABLED */
+ _scale_factor(scale_factor),
+ _interceptor(nullptr),
+ _scheduler_users()
{
if(instruments_info != nullptr)
{
@@ -149,6 +157,7 @@ SchedulerClock<output_timestamps>::SchedulerClock(ScaleFactor scale_factor)
template <bool output_timestamps>
void SchedulerClock<output_timestamps>::test_start()
{
+#ifdef ARM_COMPUTE_GRAPH_ENABLED
// Start intercepting tasks:
ARM_COMPUTE_ERROR_ON(_real_graph_function != nullptr);
_real_graph_function = graph::TaskExecutor::get().execute_function;
@@ -182,6 +191,7 @@ void SchedulerClock<output_timestamps>::test_start()
scheduler->set_prefix("");
}
};
+#endif /* ARM_COMPUTE_GRAPH_ENABLED */
ARM_COMPUTE_ERROR_ON(_real_scheduler != nullptr);
_real_scheduler_type = Scheduler::get_type();
@@ -191,7 +201,9 @@ void SchedulerClock<output_timestamps>::test_start()
_real_scheduler = &Scheduler::get();
_interceptor = std::make_shared<Interceptor<output_timestamps>>(_kernels, _layer_data_map, *_real_scheduler, _scale_factor);
Scheduler::set(std::static_pointer_cast<IScheduler>(_interceptor));
+#ifdef ARM_COMPUTE_GRAPH_ENABLED
graph::TaskExecutor::get().execute_function = task_interceptor;
+#endif /* ARM_COMPUTE_GRAPH_ENABLED */
// Create an interceptor for each scheduler
// TODO(COMPID-2638) : Allow multiple schedulers, now it assumes the same scheduler is used.
@@ -217,10 +229,12 @@ void SchedulerClock<output_timestamps>::test_stop()
{
// Restore real scheduler
Scheduler::set(_real_scheduler_type);
- _real_scheduler = nullptr;
- _interceptor = nullptr;
+ _real_scheduler = nullptr;
+ _interceptor = nullptr;
+#ifdef ARM_COMPUTE_GRAPH_ENABLED
graph::TaskExecutor::get().execute_function = _real_graph_function;
_real_graph_function = nullptr;
+#endif /* ARM_COMPUTE_GRAPH_ENABLED */
// Restore schedulers
std::for_each(std::begin(_scheduler_users), std::end(_scheduler_users),
@@ -270,9 +284,9 @@ Instrument::MeasurementsMap SchedulerClock<output_timestamps>::measurements() co
}
template <bool output_timestamps>
-std::string SchedulerClock<output_timestamps>::instrument_header() const
+std::string SchedulerClock<output_timestamps>::instrument_header() const
{
- std::string output{""};
+ std::string output{ "" };
output += R"("layer_data" : {)";
for(auto i_it = _layer_data_map.cbegin(), i_end = _layer_data_map.cend(); i_it != i_end; ++i_it)
{
diff --git a/tests/framework/instruments/SchedulerTimer.h b/tests/framework/instruments/SchedulerTimer.h
index 9cc0381a9a..c437f2717c 100644
--- a/tests/framework/instruments/SchedulerTimer.h
+++ b/tests/framework/instruments/SchedulerTimer.h
@@ -97,14 +97,16 @@ public:
};
private:
- std::list<kernel_info> _kernels;
- std::map<std::string, LayerData> _layer_data_map;
- IScheduler *_real_scheduler;
- Scheduler::Type _real_scheduler_type;
+ std::list<kernel_info> _kernels;
+ std::map<std::string, LayerData> _layer_data_map;
+ IScheduler *_real_scheduler;
+ Scheduler::Type _real_scheduler_type;
+#ifdef ARM_COMPUTE_GRAPH_ENABLED
std::function<decltype(graph::execute_task)> _real_graph_function;
- ScaleFactor _scale_factor;
- std::shared_ptr<IScheduler> _interceptor;
- std::vector<ISchedulerUser *> _scheduler_users;
+#endif /* ARM_COMPUTE_GRAPH_ENABLED */
+ ScaleFactor _scale_factor;
+ std::shared_ptr<IScheduler> _interceptor;
+ std::vector<ISchedulerUser *> _scheduler_users;
};
using SchedulerTimer = SchedulerClock<false>;