From 892b70a8bb338f0c10c06112c41f6c8e8c3495f9 Mon Sep 17 00:00:00 2001 From: Giorgio Arena Date: Wed, 30 Mar 2022 12:23:10 +0100 Subject: Fix embedded kernel header inclusion for dynamic fusion Resolves: COMPMID-5155 Signed-off-by: Giorgio Arena Change-Id: Ic16fb12bfa748cac92d73019d08eea53bf470c12 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/7354 Reviewed-by: SiCong Li Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- SConscript | 14 +++++--------- .../dynamic_fusion/ClKernelBuildingImpl/Common.h | 5 +++++ .../components/ClGemmNativeKernelComponent.cpp | 2 +- src/gpu/cl/ClKernelLibrary.cpp | 12 ++++++++++++ tests/SConscript | 6 +++--- 5 files changed, 26 insertions(+), 13 deletions(-) diff --git a/SConscript b/SConscript index dd0f1488ac..3e5fa2f449 100644 --- a/SConscript +++ b/SConscript @@ -74,7 +74,7 @@ def build_obj_list(arch_info, sources, static=False): objs = tmp_env.StaticObject(sources) else: objs = tmp_env.SharedObject(sources) - + tmp_env.Default(objs) return objs @@ -96,7 +96,7 @@ def build_lib_objects(): # Build all the common files for the base architecture lib_static_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=True) lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=False) - + # Build the SVE specific files lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=True) lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=False) @@ -325,7 +325,7 @@ cpp_compiler = os.environ.get('CXX', default_cpp_compiler) # Generate embed files generate_embed = [ version_file ] if env['opencl'] and env['embed_kernels']: - + # Header files cl_helper_files = [ 'src/core/CL/cl_kernels/activation_float_helpers.h', 'src/core/CL/cl_kernels/activation_quant_helpers.h', @@ -496,12 +496,8 @@ lib_files = filelist['common'] # Experimental files # Dynamic fusion if env['experimental_dynamic_fusion']: - if env['embed_kernels']: - # COMPMID-5176 - print("Dynamic fusion with embed_kernels=1 not supported. Skipping.") - else: - lib_files += filelist['experimental']['dynamic_fusion'] - arm_compute_env.Append(CPPDEFINES = ['ENABLE_EXPERIMENTAL_DYNAMIC_FUSION']) + lib_files += filelist['experimental']['dynamic_fusion'] + arm_compute_env.Append(CPPDEFINES = ['ENABLE_EXPERIMENTAL_DYNAMIC_FUSION']) # Logging files diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h index 6e1291cdd5..4c720ea1aa 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h @@ -27,6 +27,7 @@ #define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H #include "arm_compute/core/CL/CLCompileContext.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GPUTarget.h" #include "src/core/common/Macros.h" @@ -494,7 +495,11 @@ public: for(auto &header : headers_list) { +#if defined(EMBEDDED_KERNELS) + code += CLKernelLibrary::get().get_program(header).first; +#else // defined(EMBEDDED_KERNELS) code += "#include \"" + header + "\"\n"; +#endif // defined(EMBEDDED_KERNELS) } for(auto ¯os : additional_macros) diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp index 4bf0b76c3a..7d23128276 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp @@ -44,7 +44,7 @@ ComponentType ClGemmNativeKernelComponent::get_component_type() const std::set ClGemmNativeKernelComponent::get_headers_list() const { - return std::set { "./common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h", "gemm_helpers.h", "repeat.h" }; + return std::set { "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h", "gemm_helpers.h", "repeat.h" }; } Window ClGemmNativeKernelComponent::get_window() const diff --git a/src/gpu/cl/ClKernelLibrary.cpp b/src/gpu/cl/ClKernelLibrary.cpp index a5d37f49c4..5c17b43b8f 100644 --- a/src/gpu/cl/ClKernelLibrary.cpp +++ b/src/gpu/cl/ClKernelLibrary.cpp @@ -585,6 +585,10 @@ const std::map ClKernelLibrary::_program_source_map = { "common/gemm_utils.cl", #include "./cl_kernels/common/gemm_utils.clembed" + }, + { + "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h", +#include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.hembed" }, { "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl", @@ -613,6 +617,10 @@ const std::map ClKernelLibrary::_program_source_map = { "common/generate_proposals_quantized.cl", #include "./cl_kernels/common/generate_proposals_quantized.clembed" + }, + { + "gemm_helpers.h", +#include "./cl_kernels/gemm_helpers.hembed" }, { "helpers.h", @@ -621,6 +629,10 @@ const std::map ClKernelLibrary::_program_source_map = { "helpers_asymm.h", #include "./cl_kernels/helpers_asymm.hembed" + }, + { + "repeat.h", +#include "./cl_kernels/repeat.hembed" }, { "common/instance_normalization.cl", diff --git a/tests/SConscript b/tests/SConscript index cfe3bc5e82..62fa4fce11 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -114,8 +114,9 @@ filter_pattern = test_env['test_filter'] files_validation += Glob('validation/CPP/' + filter_pattern) if env['opencl']: - if env['experimental_dynamic_fusion'] and not env['embed_kernels']: + if env['experimental_dynamic_fusion']: test_env.Append(CPPDEFINES = ['ENABLE_EXPERIMENTAL_DYNAMIC_FUSION']) + files_validation += Glob('validation/CL/UNIT/dynamic_fusion/' + filter_pattern) filter_pattern = test_env['test_filter'] @@ -128,8 +129,7 @@ if env['opencl']: files_validation += Glob('validation/CL/*/' + filter_pattern) files_validation += Glob('validation/CL/' + filter_pattern) - if env['experimental_dynamic_fusion'] and not env['embed_kernels']: - files_validation += Glob('validation/CL/UNIT/dynamic_fusion/' + filter_pattern) + if env['external_tests_dir']: files_validation += Glob(env['external_tests_dir'] + '/tests/validation/CL/' + filter_pattern) files_validation += Glob('validation/gpu/unit/*.cpp') -- cgit v1.2.1