aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2022-03-30 12:23:10 +0100
committerGiorgio Arena <giorgio.arena@arm.com>2022-03-31 11:08:38 +0000
commit892b70a8bb338f0c10c06112c41f6c8e8c3495f9 (patch)
tree0a855ad5854335a6c9c48ad30312d3e0b0f0235d
parent36a1c119cfabfe3da6ea3ef69a0a21929cb418d6 (diff)
downloadComputeLibrary-892b70a8bb338f0c10c06112c41f6c8e8c3495f9.tar.gz
Fix embedded kernel header inclusion for dynamic fusion
Resolves: COMPMID-5155 Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Change-Id: Ic16fb12bfa748cac92d73019d08eea53bf470c12 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7354 Reviewed-by: SiCong Li <sicong.li@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--SConscript14
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h5
-rw-r--r--src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp2
-rw-r--r--src/gpu/cl/ClKernelLibrary.cpp12
-rw-r--r--tests/SConscript6
5 files changed, 26 insertions, 13 deletions
diff --git a/SConscript b/SConscript
index dd0f1488ac..3e5fa2f449 100644
--- a/SConscript
+++ b/SConscript
@@ -74,7 +74,7 @@ def build_obj_list(arch_info, sources, static=False):
objs = tmp_env.StaticObject(sources)
else:
objs = tmp_env.SharedObject(sources)
-
+
tmp_env.Default(objs)
return objs
@@ -96,7 +96,7 @@ def build_lib_objects():
# Build all the common files for the base architecture
lib_static_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=True)
lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=False)
-
+
# Build the SVE specific files
lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=True)
lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=False)
@@ -325,7 +325,7 @@ cpp_compiler = os.environ.get('CXX', default_cpp_compiler)
# Generate embed files
generate_embed = [ version_file ]
if env['opencl'] and env['embed_kernels']:
-
+
# Header files
cl_helper_files = [ 'src/core/CL/cl_kernels/activation_float_helpers.h',
'src/core/CL/cl_kernels/activation_quant_helpers.h',
@@ -496,12 +496,8 @@ lib_files = filelist['common']
# Experimental files
# Dynamic fusion
if env['experimental_dynamic_fusion']:
- if env['embed_kernels']:
- # COMPMID-5176
- print("Dynamic fusion with embed_kernels=1 not supported. Skipping.")
- else:
- lib_files += filelist['experimental']['dynamic_fusion']
- arm_compute_env.Append(CPPDEFINES = ['ENABLE_EXPERIMENTAL_DYNAMIC_FUSION'])
+ lib_files += filelist['experimental']['dynamic_fusion']
+ arm_compute_env.Append(CPPDEFINES = ['ENABLE_EXPERIMENTAL_DYNAMIC_FUSION'])
# Logging files
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
index 6e1291cdd5..4c720ea1aa 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h
@@ -27,6 +27,7 @@
#define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H
#include "arm_compute/core/CL/CLCompileContext.h"
+#include "arm_compute/core/CL/CLKernelLibrary.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/GPUTarget.h"
#include "src/core/common/Macros.h"
@@ -494,7 +495,11 @@ public:
for(auto &header : headers_list)
{
+#if defined(EMBEDDED_KERNELS)
+ code += CLKernelLibrary::get().get_program(header).first;
+#else // defined(EMBEDDED_KERNELS)
code += "#include \"" + header + "\"\n";
+#endif // defined(EMBEDDED_KERNELS)
}
for(auto &macros : additional_macros)
diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp
index 4bf0b76c3a..7d23128276 100644
--- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp
+++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp
@@ -44,7 +44,7 @@ ComponentType ClGemmNativeKernelComponent::get_component_type() const
std::set<std::string> ClGemmNativeKernelComponent::get_headers_list() const
{
- return std::set<std::string> { "./common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h", "gemm_helpers.h", "repeat.h" };
+ return std::set<std::string> { "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h", "gemm_helpers.h", "repeat.h" };
}
Window ClGemmNativeKernelComponent::get_window() const
diff --git a/src/gpu/cl/ClKernelLibrary.cpp b/src/gpu/cl/ClKernelLibrary.cpp
index a5d37f49c4..5c17b43b8f 100644
--- a/src/gpu/cl/ClKernelLibrary.cpp
+++ b/src/gpu/cl/ClKernelLibrary.cpp
@@ -587,6 +587,10 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map =
#include "./cl_kernels/common/gemm_utils.clembed"
},
{
+ "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h",
+#include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.hembed"
+ },
+ {
"common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl",
#include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.clembed"
},
@@ -615,6 +619,10 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map =
#include "./cl_kernels/common/generate_proposals_quantized.clembed"
},
{
+ "gemm_helpers.h",
+#include "./cl_kernels/gemm_helpers.hembed"
+ },
+ {
"helpers.h",
#include "./cl_kernels/helpers.hembed"
},
@@ -623,6 +631,10 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map =
#include "./cl_kernels/helpers_asymm.hembed"
},
{
+ "repeat.h",
+#include "./cl_kernels/repeat.hembed"
+ },
+ {
"common/instance_normalization.cl",
#include "./cl_kernels/common/instance_normalization.clembed"
},
diff --git a/tests/SConscript b/tests/SConscript
index cfe3bc5e82..62fa4fce11 100644
--- a/tests/SConscript
+++ b/tests/SConscript
@@ -114,8 +114,9 @@ filter_pattern = test_env['test_filter']
files_validation += Glob('validation/CPP/' + filter_pattern)
if env['opencl']:
- if env['experimental_dynamic_fusion'] and not env['embed_kernels']:
+ if env['experimental_dynamic_fusion']:
test_env.Append(CPPDEFINES = ['ENABLE_EXPERIMENTAL_DYNAMIC_FUSION'])
+ files_validation += Glob('validation/CL/UNIT/dynamic_fusion/' + filter_pattern)
filter_pattern = test_env['test_filter']
@@ -128,8 +129,7 @@ if env['opencl']:
files_validation += Glob('validation/CL/*/' + filter_pattern)
files_validation += Glob('validation/CL/' + filter_pattern)
- if env['experimental_dynamic_fusion'] and not env['embed_kernels']:
- files_validation += Glob('validation/CL/UNIT/dynamic_fusion/' + filter_pattern)
+
if env['external_tests_dir']:
files_validation += Glob(env['external_tests_dir'] + '/tests/validation/CL/' + filter_pattern)
files_validation += Glob('validation/gpu/unit/*.cpp')