aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/template_writer/cl
diff options
context:
space:
mode:
authorRamy Elgammal <ramy.elgammal@arm.com>2022-11-30 16:23:10 +0000
committerRamy Elgammal <ramy.elgammal@arm.com>2022-12-09 13:57:49 +0000
commitdf6a3b05842a98702437347ca269138ccd55f852 (patch)
treed38b3cc83acfa0aa492b953b6a3c06104e0d76fc /src/dynamic_fusion/sketch/gpu/template_writer/cl
parent86689cdd95f634fb374f3875f62a4cb3408e1699 (diff)
downloadComputeLibrary-df6a3b05842a98702437347ca269138ccd55f852.tar.gz
Use heuristics for setting dynamic fusion direct conv2d tile sizes
Resolves: COMPMID-5735 Change-Id: I9958413b69c5052cfa205dd0e9457cc4953aaf35 Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com> Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/c/VisualCompute/ComputeLibrary/+/474818 Tested-by: bsgcomp <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: bsgcomp <bsgcomp@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8724 Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl')
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp17
1 files changed, 8 insertions, 9 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
index 75e812af9f..6f7bf72df8 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -69,7 +69,7 @@ std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &com
ARM_COMPUTE_UNUSED(comp_group);
const auto channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
- const auto k0 = adjust_vec_size(is_data_type_quantized(_src->data_type()) ? 16u : 8u, _src->dimension(channel_idx));
+ const auto k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0;
std::string code = R"_(
@@ -303,13 +303,11 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const
{
const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
- const DataType data_type = _src->data_type();
- /// NOTE: For now tile sizes (n0, m0, k0) are set by the execution window. This may change in the future
const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- const unsigned int k0 = adjust_vec_size(is_data_type_quantized(data_type) ? 16u : 8u, _src->dimension(channel_idx));
+ const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
CLBuildOptions build_opts{};
@@ -369,15 +367,16 @@ Window ClTemplateDirectConv2d::get_window() const
ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
const auto output_shape = _dst->tensor_shape();
+ const auto desc = _settings.direct_conv_descriptor();
- const unsigned int vec_size = std::min(static_cast<unsigned int>(output_shape[0]), 4u);
- const unsigned int num_rows = (_dst->tensor_shape()[0] > 16) ? ((_src->data_type() == DataType::F32) ? 2U : 4U) : 1U;
+ const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]);
+ const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1] * output_shape[2]);
// Create and configure kernel window
- Window win = calculate_max_window(output_shape, Steps(vec_size, num_rows));
+ Window win = calculate_max_window(output_shape, Steps(n0, m0));
- const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], num_rows);
- win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, num_rows));
+ const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], m0);
+ win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, m0));
win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1));
return win;