COMPMID-661: Optimize FC layer with 2 new Bifrost kernels and LWS tuning (#33)

Change-Id: Ie56ac88dff5ff339572cec562e8cd62dc7f0aa8b Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/109805 Tested-by: BSG Visual Compute Jenkins server to access repositories on http://mpd-gerrit.cambridge.arm.com <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
author: Anton Lokhmotov <psyhtest@users.noreply.github.com> 2017-11-20 11:02:10 +0000
committer: Anthony Barbier <anthony.barbier@arm.com> 2018-11-02 16:35:24 +0000
commit: 3e80c7fa601d5996e8ada3b2f6c69327f066ec17 (patch)
tree: e1d4f1c8c1dafe46005feb4e716ed80b6bbe9489 /src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
parent: d7295b7079f6b9126596cea998146ca9c6e87706 (diff)
download: ComputeLibrary-3e80c7fa601d5996e8ada3b2f6c69327f066ec17.tar.gz
1 files changed, 14 insertions, 9 deletions
diff --git a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
index 263cfab2dc..015b4f70a4 100644
--- a/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
+++ b/src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
@@ -51,18 +51,23 @@ void CLGEMMMatrixAccumulateBiasesKernel::configure(ICLTensor *accum, const ICLTe
     _biases = biases;
     _accum  = accum;
 
-    std::set<std::string> build_opts;
-    build_opts.insert(("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type())));
-    if(is_data_type_fixed_point(accum->info()->data_type()))
-    {
-        build_opts.emplace("-DFIXED_POINT_POSITION=" + support::cpp11::to_string(accum->info()->fixed_point_position()));
-    }
+    // Get the target architecture
+    GPUTarget arch_target = get_arch_from_target(get_target());
+    // Select the vector size to use (8 for Bifrost; 16 for Midgard).
+    const unsigned int vector_size = (arch_target == GPUTarget::BIFROST) ? 8 : 16;
+
+    // Add build options
+    CLBuildOptions build_opts;
+    build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(accum->info()->data_type()));
+    build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size));
+    build_opts.add_option_if(is_data_type_fixed_point(accum->info()->data_type()),
+                             "-DFIXED_POINT_POSITION=" + support::cpp11::to_string(accum->info()->fixed_point_position()));
 
     // Create kernel
-    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts));
+    _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("gemm_accumulate_biases", build_opts.options()));
 
     // Configure kernel window
-    const unsigned int num_elems_processed_per_iteration = 16;
+    const unsigned int num_elems_processed_per_iteration = vector_size;
 
     Window win = calculate_max_window(*_accum->info(), Steps(num_elems_processed_per_iteration));
 
@@ -92,7 +97,7 @@ void CLGEMMMatrixAccumulateBiasesKernel::run(const Window &window, cl::CommandQu
         add_2D_tensor_argument(idx, _accum, accum_slice);
         add_1D_tensor_argument(idx, _biases, biases_slice);
 
-        enqueue(queue, *this, accum_slice);
+        enqueue(queue, *this, accum_slice, _lws_hint);
     }
     while(window.slide_window_slice_2D(accum_slice));
 }
author	Anton Lokhmotov <psyhtest@users.noreply.github.com>	2017-11-20 11:02:10 +0000
committer	Anthony Barbier <anthony.barbier@arm.com>	2018-11-02 16:35:24 +0000
commit	3e80c7fa601d5996e8ada3b2f6c69327f066ec17 (patch)
tree	e1d4f1c8c1dafe46005feb4e716ed80b6bbe9489 /src/core/CL/kernels/CLGEMMMatrixAccumulateBiasesKernel.cpp
parent	d7295b7079f6b9126596cea998146ca9c6e87706 (diff)
download	ComputeLibrary-3e80c7fa601d5996e8ada3b2f6c69327f066ec17.tar.gz