aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
diff options
context:
space:
mode:
authorGunes Bayir <gunes.bayir@arm.com>2024-01-17 16:07:03 +0000
committerViet-Hoa Do <viet-hoa.do@arm.com>2024-02-01 16:00:34 +0000
commit2b9fa593a0a172bf36a02b5cdb840c6b9b361d7c (patch)
treea4e2d5ce46443a79a0778e4960462ce3edf106ec /src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
parent7ab7fca87cca8775f82b0e9efec6a40975910c17 (diff)
downloadComputeLibrary-2b9fa593a0a172bf36a02b5cdb840c6b9b361d7c.tar.gz
Use the stable CKW API in the GPU dynamic fusion backend
- Refactor all kernels to work with the CKW stable API - Add support for sub-tile in the op_load/op_store CKW operator - Fix mismatch in resize - Add comments in all kernels written with CKW to help developers understand the structure of the code - Add texture image support in depthwise convolution written with CKW - Add support for different block sizes in depthwise convolution - Remove the use of the dynamic fusion helper functions. - Add support for floor in the op_unary() of CKW Resolves: COMPMID-6708, COMPMID-6743, COMPMID-6530 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Change-Id: I8104ce4d04a3138a1aeb0b84940e1f1c89e76069 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10914 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp28
1 files changed, 13 insertions, 15 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
index ebb0374501..8936db6abe 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2023 Arm Limited.
+ * Copyright (c) 2023-2024 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -78,9 +78,8 @@ std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_grou
std::string ClTemplatePool2d::get_MxN_kernel_code() const
{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision =
- (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
+ const auto pool_type = _attributes.pool_type();
+ const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX;
// Define pool op macro.
std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
@@ -226,11 +225,10 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
std::string ClTemplatePool2d::get_2x2_kernel_code() const
{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision =
- (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
- std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
- : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
+ const auto pool_type = _attributes.pool_type();
+ const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX;
+ std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
+ : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
std::string code = R"_(
//------------------ START KERNEL {{meta_kernel_id}} ---------------------
@@ -385,12 +383,12 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["meta_kernel_id"] = id();
// Retrieve relevant data
- const auto padding = _attributes.pad();
- const auto stride = _attributes.stride();
- const auto pool_size = _attributes.pool_size();
- const auto data_type = _src->data_type();
- const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() &&
- _attributes.pool_type() != PoolingType::MAX;
+ const auto padding = _attributes.pad();
+ const auto stride = _attributes.stride();
+ const auto pool_size = _attributes.pool_size();
+ const auto data_type = _src->data_type();
+ const auto use_fp_mixed_precision =
+ (_src->data_type() == DataType::F16) && _attributes.pool_type() != PoolingType::MAX;
const std::string max_initial_value =
_settings.use_inf_as_limit() ? "(-INFINITY)"
: float_to_string_with_full_precision(std::numeric_limits<float>::lowest());