aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2018-07-06 17:06:36 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:54:54 +0000
commit6200fa405b16b4145b926a96de197718ad31bf93 (patch)
tree4ecaa3a29d79371c6439acf5bb580bc7ba99af09 /src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
parentea55f91e5dd4e5bc766fabbac6df6ce3ab984d0e (diff)
downloadComputeLibrary-6200fa405b16b4145b926a96de197718ad31bf93.tar.gz
COMPMID-1288 Optimizing CLGEMMLowp using 8 bit dot product instruction
Change-Id: I536174b9381660a94578d6aa1892a6289a820391 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/139109 Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com> Tested-by: Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp')
-rw-r--r--src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp8
1 files changed, 6 insertions, 2 deletions
diff --git a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
index e040122663..8a4a1b5820 100644
--- a/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMLowpMatrixMultiplyKernel.cpp
@@ -190,6 +190,8 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC
ARM_COMPUTE_ERROR_THROW_ON(win_config.first);
ICLKernel::configure(win_config.second);
+ const bool is_dot8_supported = dot8_supported(CLKernelLibrary::get().get_device());
+
// Create build options
CLBuildOptions build_opts;
std::string kernel_name(" ");
@@ -206,15 +208,17 @@ void CLGEMMLowpMatrixMultiplyKernel::configure(const ICLTensor *input0, const IC
build_opts.add_option("-DTRANSPOSE1XW_WIDTH_STEP=" + support::cpp11::to_string(4 * mult_transpose1xW_width));
build_opts.add_option("-DMULT_INTERLEAVE4X4_HEIGHT=" + support::cpp11::to_string(mult_interleave4x4_height));
- kernel_name = "gemmlowp_mm_interleaved_transposed_" + string_from_target(arch_target);
+ kernel_name = "gemmlowp_mm_interleaved_transposed_" + string_from_target(arch_target) + (is_dot8_supported ? "_dot8" : "");
}
else
{
build_opts.add_option("-DCOLS_A=" + support::cpp11::to_string(input0->info()->dimension(0)));
build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_X=" + support::cpp11::to_string(num_elements_processed.x()));
build_opts.add_option("-DNUM_ELEMS_PROCESSED_PER_THREAD_Y=" + support::cpp11::to_string(num_elements_processed.y()));
- kernel_name = "gemmlowp_mm_" + string_from_target(arch_target);
+
+ kernel_name = "gemmlowp_mm_" + string_from_target(arch_target) + (is_dot8_supported ? "_dot8" : "");
}
+
// Create kernel
_kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options()));