diff options
-rw-r--r-- | SConscript | 14 | ||||
-rw-r--r-- | src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h | 5 | ||||
-rw-r--r-- | src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp | 2 | ||||
-rw-r--r-- | src/gpu/cl/ClKernelLibrary.cpp | 12 | ||||
-rw-r--r-- | tests/SConscript | 6 |
5 files changed, 26 insertions, 13 deletions
diff --git a/SConscript b/SConscript index dd0f1488ac..3e5fa2f449 100644 --- a/SConscript +++ b/SConscript @@ -74,7 +74,7 @@ def build_obj_list(arch_info, sources, static=False): objs = tmp_env.StaticObject(sources) else: objs = tmp_env.SharedObject(sources) - + tmp_env.Default(objs) return objs @@ -96,7 +96,7 @@ def build_lib_objects(): # Build all the common files for the base architecture lib_static_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=True) lib_shared_objs += build_obj_list(filedefs["armv8.2-a"], lib_files, static=False) - + # Build the SVE specific files lib_static_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=True) lib_shared_objs += build_obj_list(filedefs["armv8.2-a-sve"], lib_files_sve, static=False) @@ -325,7 +325,7 @@ cpp_compiler = os.environ.get('CXX', default_cpp_compiler) # Generate embed files generate_embed = [ version_file ] if env['opencl'] and env['embed_kernels']: - + # Header files cl_helper_files = [ 'src/core/CL/cl_kernels/activation_float_helpers.h', 'src/core/CL/cl_kernels/activation_quant_helpers.h', @@ -496,12 +496,8 @@ lib_files = filelist['common'] # Experimental files # Dynamic fusion if env['experimental_dynamic_fusion']: - if env['embed_kernels']: - # COMPMID-5176 - print("Dynamic fusion with embed_kernels=1 not supported. Skipping.") - else: - lib_files += filelist['experimental']['dynamic_fusion'] - arm_compute_env.Append(CPPDEFINES = ['ENABLE_EXPERIMENTAL_DYNAMIC_FUSION']) + lib_files += filelist['experimental']['dynamic_fusion'] + arm_compute_env.Append(CPPDEFINES = ['ENABLE_EXPERIMENTAL_DYNAMIC_FUSION']) # Logging files diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h index 6e1291cdd5..4c720ea1aa 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/Common.h @@ -27,6 +27,7 @@ #define ARM_COMPUTE_EXPERIMENTAL_DYNAMICFUSION_IMPL_COMMON_H #include "arm_compute/core/CL/CLCompileContext.h" +#include "arm_compute/core/CL/CLKernelLibrary.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/GPUTarget.h" #include "src/core/common/Macros.h" @@ -494,7 +495,11 @@ public: for(auto &header : headers_list) { +#if defined(EMBEDDED_KERNELS) + code += CLKernelLibrary::get().get_program(header).first; +#else // defined(EMBEDDED_KERNELS) code += "#include \"" + header + "\"\n"; +#endif // defined(EMBEDDED_KERNELS) } for(auto ¯os : additional_macros) diff --git a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp index 4bf0b76c3a..7d23128276 100644 --- a/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp +++ b/src/core/experimental/dynamic_fusion/ClKernelBuildingImpl/components/ClGemmNativeKernelComponent.cpp @@ -44,7 +44,7 @@ ComponentType ClGemmNativeKernelComponent::get_component_type() const std::set<std::string> ClGemmNativeKernelComponent::get_headers_list() const { - return std::set<std::string> { "./common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h", "gemm_helpers.h", "repeat.h" }; + return std::set<std::string> { "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h", "gemm_helpers.h", "repeat.h" }; } Window ClGemmNativeKernelComponent::get_window() const diff --git a/src/gpu/cl/ClKernelLibrary.cpp b/src/gpu/cl/ClKernelLibrary.cpp index a5d37f49c4..5c17b43b8f 100644 --- a/src/gpu/cl/ClKernelLibrary.cpp +++ b/src/gpu/cl/ClKernelLibrary.cpp @@ -587,6 +587,10 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map = #include "./cl_kernels/common/gemm_utils.clembed" }, { + "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.h", +#include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/fp_post_ops_act_eltwise_op_act.hembed" + }, + { "common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.cl", #include "./cl_kernels/common/experimental/gemm_fused_post_ops/act_eltwise_op_act/gemm_mm_native.clembed" }, @@ -615,6 +619,10 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map = #include "./cl_kernels/common/generate_proposals_quantized.clembed" }, { + "gemm_helpers.h", +#include "./cl_kernels/gemm_helpers.hembed" + }, + { "helpers.h", #include "./cl_kernels/helpers.hembed" }, @@ -623,6 +631,10 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map = #include "./cl_kernels/helpers_asymm.hembed" }, { + "repeat.h", +#include "./cl_kernels/repeat.hembed" + }, + { "common/instance_normalization.cl", #include "./cl_kernels/common/instance_normalization.clembed" }, diff --git a/tests/SConscript b/tests/SConscript index cfe3bc5e82..62fa4fce11 100644 --- a/tests/SConscript +++ b/tests/SConscript @@ -114,8 +114,9 @@ filter_pattern = test_env['test_filter'] files_validation += Glob('validation/CPP/' + filter_pattern) if env['opencl']: - if env['experimental_dynamic_fusion'] and not env['embed_kernels']: + if env['experimental_dynamic_fusion']: test_env.Append(CPPDEFINES = ['ENABLE_EXPERIMENTAL_DYNAMIC_FUSION']) + files_validation += Glob('validation/CL/UNIT/dynamic_fusion/' + filter_pattern) filter_pattern = test_env['test_filter'] @@ -128,8 +129,7 @@ if env['opencl']: files_validation += Glob('validation/CL/*/' + filter_pattern) files_validation += Glob('validation/CL/' + filter_pattern) - if env['experimental_dynamic_fusion'] and not env['embed_kernels']: - files_validation += Glob('validation/CL/UNIT/dynamic_fusion/' + filter_pattern) + if env['external_tests_dir']: files_validation += Glob(env['external_tests_dir'] + '/tests/validation/CL/' + filter_pattern) files_validation += Glob('validation/gpu/unit/*.cpp') |