aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/template_writer/cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/template_writer/cl')
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp26
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp30
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp81
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp112
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp94
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h5
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp57
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h4
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp35
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp92
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp28
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h4
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp56
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp16
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp59
20 files changed, 306 insertions, 398 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
index 3c7c843dd8..c165fb5f33 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
@@ -39,10 +40,7 @@ namespace dynamic_fusion
ClTemplateActivation::ClTemplateActivation(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
@@ -62,7 +60,7 @@ std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_
code = R"_(
//------------------ START KERNEL {{meta_kernel_id}} ---------------------
)_";
- if(is_root)
+ if (is_root)
{
code += R"_(
// IN(src) {{src}}
@@ -104,17 +102,11 @@ LOOP_UNROLLING(int, i, 0, 1, M0,
void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -173,7 +165,7 @@ std::string ClTemplateActivation::get_config_id() const
std::set<std::string> ClTemplateActivation::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h", "activation_float_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h", "activation_float_helpers.h"};
}
Window ClTemplateActivation::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
index ec78cf6ce5..88ee370342 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
index 4956879ad3..0da3a73801 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
@@ -35,7 +36,7 @@ namespace experimental
namespace dynamic_fusion
{
ClTemplateCast::ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}, _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -62,7 +63,7 @@ std::string ClTemplateCast::get_component_code(const ComponentGroup &comp_group)
//------------------ START KERNEL {{meta_kernel_id}} CAST ---------------------
)_";
- if(is_root)
+ if (is_root)
{
code += R"_(
// IN_0(src) {{src}}
@@ -82,14 +83,15 @@ TILE(uint, M0, 1, g_dst_indirect_y);
{
)_";
- if(kernel_name == "cast_down" && is_data_type_quantized(_src->data_type()))
+ if (kernel_name == "cast_down" && is_data_type_quantized(_src->data_type()))
{
code += R"_(
{{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80;
)_";
}
- if(kernel_name == "cast_down" && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
+ if (kernel_name == "cast_down" &&
+ (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
{
code += R"_(
{{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
@@ -106,7 +108,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
})
)_";
- if(is_root)
+ if (is_root)
{
code += R"_(
LOOP_UNROLLING(int, i, 0, 1, M0,
@@ -128,17 +130,11 @@ TILE(uint, M0, 1, g_dst_indirect_y);
void ClTemplateCast::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -199,7 +195,7 @@ std::string ClTemplateCast::get_config_id() const
std::set<std::string> ClTemplateCast::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateCast::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
index ab7cc9f05a..8380620ab2 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
@@ -36,17 +36,17 @@ ClTemplateDepthwiseConv2d::ClTemplateDepthwiseConv2d(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuTemplateComponentWriter{ id, tensors },
+ : IGpuTemplateComponentWriter{id, tensors},
_src{},
_weight{},
_bias{},
_dst{},
- _attributes{ attributes },
- _settings{ settings }
+ _attributes{attributes},
+ _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- if(this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+ if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
{
_bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
}
@@ -71,7 +71,7 @@ std::string ClTemplateDepthwiseConv2d::get_component_code(const ComponentGroup &
// IN_1(wei) {{weight}}
)_";
- if(_bias != nullptr && _bias->has_valid_id())
+ if (_bias != nullptr && _bias->has_valid_id())
{
code += R"_(
// IN_1(bia) {{bias}}
@@ -113,7 +113,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
})
)_";
- if(_weight->dimension(height_idx) < 5)
+ if (_weight->dimension(height_idx) < 5)
{
code += R"_(
LOOP_UNROLLING(int, yk, 0, 1, _IWEI_HEIGHT,
@@ -147,7 +147,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
{
)_";
- if(!_settings.is_fma_available())
+ if (!_settings.is_fma_available())
{
code += R"_(
{{dst}}[m0].v += a[xk + m0].v * b[xk].v;
@@ -166,14 +166,14 @@ TILE(uint, M0, 1, g_dst_indirect_y);
}
)_";
- if(_weight->dimension(height_idx) < 5)
+ if (_weight->dimension(height_idx) < 5)
{
code += R"_(
)
)_";
}
- if(_bias && _bias->has_valid_id())
+ if (_bias && _bias->has_valid_id())
{
code += R"_(
TILE({{BIA_DATA_TYPE}}, 1, N0, {{bias}});
@@ -198,44 +198,31 @@ TILE(uint, M0, 1, g_dst_indirect_y);
return code;
}
-void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
- const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image() ?
- GpuKernelArgumentInfo::Type::Tensor_4D_t_Image :
- GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(input_type),
- "src");
-
- const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image() ?
- GpuKernelArgumentInfo::Type::Tensor_4D_t_Image :
- GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
- vtable.declare_variable(
- comp_group,
- _weight,
- GpuKernelArgumentInfo(weight_type),
- "weight");
-
- if(_bias != nullptr && _bias->has_valid_id()) // optional bias
+ const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image()
+ ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
+ : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(input_type), "src");
+
+ const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image()
+ ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
+ : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+
+ vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
+
+ if (_bias != nullptr && _bias->has_valid_id()) // optional bias
{
- vtable.declare_variable(
- comp_group,
- _bias,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector),
- "bias");
+ vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
}
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
-TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
TagLUT lut{};
@@ -243,7 +230,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab
lut["src"] = vtable.get_variable(_src);
lut["weight"] = vtable.get_variable(_weight);
- if(_bias != nullptr && _bias->has_valid_id()) // optional bias
+ if (_bias != nullptr && _bias->has_valid_id()) // optional bias
{
lut["bias"] = vtable.get_variable(_bias);
lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
@@ -259,7 +246,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab
lut["SRC_DATA_TYPE"] = _src->data_type();
lut["WEI_DATA_TYPE"] = _weight->data_type();
- switch(vtable.get_variable(_src).kernel_argument_info.type)
+ switch (vtable.get_variable(_src).kernel_argument_info.type)
{
case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
@@ -271,7 +258,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab
break;
}
- switch(vtable.get_variable(_weight).kernel_argument_info.type)
+ switch (vtable.get_variable(_weight).kernel_argument_info.type)
{
case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
@@ -318,7 +305,7 @@ CLBuildOptions ClTemplateDepthwiseConv2d::get_build_options(const ComponentGroup
CLBuildOptions build_opts{};
- if(_settings.fast_relaxed_math())
+ if (_settings.fast_relaxed_math())
{
build_opts.add_option("-cl-fast-relaxed-math");
}
@@ -361,7 +348,7 @@ std::string ClTemplateDepthwiseConv2d::get_config_id() const
std::set<std::string> ClTemplateDepthwiseConv2d::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateDepthwiseConv2d::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
index 84b689ef64..5d04c687c3 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D
#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
index 3322487910..f6a7a58d1d 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -23,14 +23,13 @@
*/
#include "ClTemplateDirectConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
-#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
namespace arm_compute
@@ -43,17 +42,17 @@ ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuTemplateComponentWriter{ id, tensors },
+ : IGpuTemplateComponentWriter{id, tensors},
_src{},
_weight{},
_bias{},
_dst{},
- _attributes{ attributes },
- _settings{ settings }
+ _attributes{attributes},
+ _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- if(this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+ if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
{
_bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
}
@@ -79,7 +78,7 @@ std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &com
// IN_0(src) {{src}}
// IN_1(wei) {{weight}}
)_";
- if(_bias && _bias->has_valid_id())
+ if (_bias && _bias->has_valid_id())
{
code += R"_(
// IN_1(bia) {{bias}}
@@ -161,7 +160,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
}
)_";
- if(leftover_loop)
+ if (leftover_loop)
{
code += R"_(
for(; ck < _ISRC_CHANNELS; ++ck)
@@ -186,9 +185,9 @@ TILE(uint, M0, 1, g_dst_indirect_y);
T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
}
)_";
-}
+ }
-code += R"_(
+ code += R"_(
#undef _I_WEI_WIDTH
#undef _I_WEI_HEIGHT
#undef _ISRC_WIDTH
@@ -202,7 +201,7 @@ code += R"_(
}
)_";
- if(_bias && _bias->has_valid_id())
+ if (_bias && _bias->has_valid_id())
{
code += R"_(
TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
@@ -211,9 +210,9 @@ code += R"_(
T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
)_";
-}
+ }
-code += R"_(
+ code += R"_(
LOOP_UNROLLING(int, i, 0, 1, M0,
{
g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1);
@@ -227,32 +226,19 @@ code += R"_(
void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image() ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
- vtable.declare_variable(
- comp_group,
- _weight,
- GpuKernelArgumentInfo(weight_type),
- "weight");
-
- if(_bias && _bias->has_valid_id()) // optional bias
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image()
+ ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
+ : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+ vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
+
+ if (_bias && _bias->has_valid_id()) // optional bias
{
- vtable.declare_variable(
- comp_group,
- _bias,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector),
- "bias");
+ vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
}
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(common_tensor_type),
- "dst");
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
}
TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -262,7 +248,7 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
lut["src"] = vtable.get_variable(_src);
lut["weight"] = vtable.get_variable(_weight);
- if(_bias && _bias->has_valid_id()) // optional bias
+ if (_bias && _bias->has_valid_id()) // optional bias
{
lut["bias"] = vtable.get_variable(_bias);
lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
@@ -279,34 +265,34 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
lut["WEI_DATA_TYPE"] = _weight->data_type();
lut["SRC_TENSOR_TYPE"] = "BUFFER";
- switch(vtable.get_variable(_weight).kernel_argument_info.type)
+ switch (vtable.get_variable(_weight).kernel_argument_info.type)
{
case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- {
- lut["WEI_TENSOR_TYPE"] = "IMAGE";
- break;
- }
+ {
+ lut["WEI_TENSOR_TYPE"] = "IMAGE";
+ break;
+ }
default:
- {
- lut["WEI_TENSOR_TYPE"] = "BUFFER";
- break;
- }
+ {
+ lut["WEI_TENSOR_TYPE"] = "BUFFER";
+ break;
+ }
}
- const auto width_idx = 1;
- const auto height_idx = 2;
+ const auto width_idx = 1;
+ const auto height_idx = 2;
const auto channel_idx = 0;
- lut["SRC_WIDTH"] = _src->dimension(width_idx);
- lut["SRC_HEIGHT"] = _src->dimension(height_idx);
+ lut["SRC_WIDTH"] = _src->dimension(width_idx);
+ lut["SRC_HEIGHT"] = _src->dimension(height_idx);
lut["SRC_CHANNELS"] = _src->dimension(channel_idx);
- lut["WEI_WIDTH"] = _weight->dimension(width_idx);
- lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
+ lut["WEI_WIDTH"] = _weight->dimension(width_idx);
+ lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
- lut["DST_WIDTH"] = _dst->dimension(width_idx);
- lut["DST_HEIGHT"] = _dst->dimension(height_idx);
+ lut["DST_WIDTH"] = _dst->dimension(width_idx);
+ lut["DST_HEIGHT"] = _dst->dimension(height_idx);
lut["DST_CHANNELS"] = _dst->dimension(channel_idx);
lut["STRIDE_X"] = _attributes.stride().x();
@@ -324,14 +310,14 @@ CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &c
{
const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
+ const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
+ const unsigned int n0 = root_window.x().step();
+ const unsigned int m0 = root_window.y().step();
+ const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
CLBuildOptions build_opts{};
- if(_settings.fast_relaxed_math())
+ if (_settings.fast_relaxed_math())
{
build_opts.add_option("-cl-fast-relaxed-math");
}
@@ -379,7 +365,7 @@ std::string ClTemplateDirectConv2d::get_config_id() const
std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateDirectConv2d::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
index 8988d3ca1c..03c8cd2f15 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
index c0481ae190..78bff3c3f3 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
@@ -23,14 +23,13 @@
*/
#include "ClTemplateElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
-
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
-#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
namespace arm_compute
@@ -44,11 +43,7 @@ constexpr unsigned int vector_size_byte_opencl = 16;
ClTemplateElementwiseBinary::ClTemplateElementwiseBinary(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors },
- _lhs{},
- _rhs{},
- _dst{},
- _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}
{
_lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -69,67 +64,67 @@ std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup
const bool is_rhs_input = comp_group.is_input_tensor(_rhs);
code =
-R"_(
+ R"_(
//------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
)_";
- if(is_root)
+ if (is_root)
{
code +=
-R"_(
+ R"_(
TILE(uint, M0, 1, g_dst_indirect_y);
)_";
}
- if(is_lhs_input)
+ if (is_lhs_input)
{
code +=
-R"_(
+ R"_(
TILE({{DATA_TYPE}}, {{lhs_m0}}, N0, {{lhs}});
)_";
}
- if(is_rhs_input)
+ if (is_rhs_input)
{
code +=
-R"_(
+ R"_(
TILE({{DATA_TYPE}}, {{rhs_m0}}, N0, {{rhs}});
)_";
}
code +=
-R"_(
+ R"_(
{
)_";
- if(is_lhs_input)
+ if (is_lhs_input)
{
code +=
-R"_(
+ R"_(
{{lhs}}_offset_first_element_in_bytes += g_ind_2 * {{lhs}}_stride_w;
T_LOAD({{DATA_TYPE}}, {{lhs_m0}}, {{lhs_n0}}, BUFFER, {{lhs}}, {{lhs_start_ind_0}}, {{lhs_start_ind_1}}, 1, {{lhs}}_stride_y, {{lhs}});
)_";
}
- if(is_rhs_input)
+ if (is_rhs_input)
{
code +=
-R"_(
+ R"_(
{{rhs}}_offset_first_element_in_bytes += g_ind_2 * {{rhs}}_stride_w;
T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_ind_0}}, {{rhs_start_ind_1}}, 1, {{rhs}}_stride_y, {{rhs}});
)_";
}
code +=
-R"_(
+ R"_(
T_ELTWISE_{{BROADCAST_OP}}{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{lhs}}, {{rhs}}, {{dst}});
)_";
- if(is_root)
+ if (is_root)
{
// Calculate the destination indirect Y
code +=
-R"_(
+ R"_(
LOOP_UNROLLING(int, i, 0, 1, M0,
{
g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1);
@@ -139,7 +134,7 @@ R"_(
}
code +=
-R"_(
+ R"_(
}
//------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
)_";
@@ -147,28 +142,18 @@ R"_(
return code;
}
-void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _lhs,
- GpuKernelArgumentInfo(common_tensor_type),
- "lhs");
-
- vtable.declare_variable(
- comp_group,
- _rhs,
- GpuKernelArgumentInfo(common_tensor_type),
- "rhs");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(common_tensor_type),
- "dst");
+ vtable.declare_variable(comp_group, _lhs, GpuKernelArgumentInfo(common_tensor_type), "lhs");
+
+ vtable.declare_variable(comp_group, _rhs, GpuKernelArgumentInfo(common_tensor_type), "rhs");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
}
-TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
TagLUT lut{};
@@ -182,7 +167,7 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
lut["dst"] = vtable.get_variable(_dst);
lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
- switch(_attributes.operation())
+ switch (_attributes.operation())
{
case Attributes::ElementwiseOp::Add:
lut["ELTWISE_OP"] = "ADD";
@@ -197,10 +182,10 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
}
- ARM_COMPUTE_ERROR_ON(
- comp_group.is_intermediate_tensor(_lhs) && detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
- ARM_COMPUTE_ERROR_ON(
- comp_group.is_intermediate_tensor(_rhs) && detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
+ ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_lhs) &&
+ detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
+ ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_rhs) &&
+ detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
// Set broadcast parameters
// PRE: All tensors are broadcast-compatible
@@ -228,9 +213,7 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0";
lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1";
- lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" :
- (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" :
- "";
+ lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : "";
return lut;
}
@@ -268,7 +251,7 @@ std::string ClTemplateElementwiseBinary::get_config_id() const
std::set<std::string> ClTemplateElementwiseBinary::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateElementwiseBinary::get_window() const
@@ -279,8 +262,9 @@ Window ClTemplateElementwiseBinary::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
index 8cca954efe..991c0eca44 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY
#include "arm_compute/core/experimental/Types.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
@@ -48,9 +49,7 @@ public:
* @param[in] tensors Tensor arguments to the components
* @param[in] attributes Component attributes
*/
- ClTemplateElementwiseBinary(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ ClTemplateElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
/** Prevent instances of this class from being copy constructed */
ClTemplateElementwiseBinary(const ClTemplateElementwiseBinary &elementwise) = delete;
/** Prevent instances of this class from being copied */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
index a8d8d32b12..522c33a022 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
@@ -38,16 +39,12 @@ namespace dynamic_fusion
{
namespace
{
- constexpr unsigned int serial_vector_size = 8;
+constexpr unsigned int serial_vector_size = 8;
} // namespace
ClTemplateLogits1DMaxShiftExpSum::ClTemplateLogits1DMaxShiftExpSum(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _sum{},
- _dst{},
- _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_sum = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -79,7 +76,7 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
const bool beta_defined = (_attributes.beta() != 1.f);
- if(beta_defined)
+ if (beta_defined)
{
code += R"_(
VEC_TYPE beta = (VEC_TYPE){{BETA}};
@@ -91,7 +88,7 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
const unsigned int vector_size = adjust_vec_size(_serial_vector_size, reduction_dim_size);
const bool non_multiple_of_n0 = ((reduction_dim_size % vector_size) != 0);
- if(non_multiple_of_n0)
+ if (non_multiple_of_n0)
{
code += R"_(
VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
@@ -111,19 +108,19 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
VEC_TYPE sum1D = 0;
)_";
- if(non_multiple_of_n0)
+ if (non_multiple_of_n0)
{
code += R"_(
data -= max_val;
)_";
- if(beta_defined)
+ if (beta_defined)
{
code += R"_(
data *= beta;
)_";
}
- if(_attributes.is_log_softmax())
+ if (_attributes.is_log_softmax())
{
code += R"_(
VSTORE_PARTIAL(N0, PARTIAL_N0)
@@ -153,14 +150,14 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
data -= max_val;
)_";
- if(beta_defined)
+ if (beta_defined)
{
code += R"_(
data *= beta;
)_";
}
- if(_attributes.is_log_softmax())
+ if (_attributes.is_log_softmax())
{
code += R"_(
VSTORE(N0)
@@ -191,28 +188,18 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
return code;
}
-void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _sum,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "sum");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
+
+ vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
}
-TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
ARM_COMPUTE_UNUSED(comp_group);
@@ -241,8 +228,8 @@ CLBuildOptions ClTemplateLogits1DMaxShiftExpSum::get_build_options(const Compone
ARM_COMPUTE_UNUSED(comp_group);
CLBuildOptions build_opts{};
- const unsigned int reduction_dim_size = _src->dimension(0);
- const unsigned int vector_size = adjust_vec_size(serial_vector_size, reduction_dim_size);
+ const unsigned int reduction_dim_size = _src->dimension(0);
+ const unsigned int vector_size = adjust_vec_size(serial_vector_size, reduction_dim_size);
build_opts.add_option("-DN0=" + support::cpp11::to_string(vector_size));
build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((reduction_dim_size % vector_size)));
@@ -264,7 +251,7 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_config_id() const
std::set<std::string> ClTemplateLogits1DMaxShiftExpSum::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateLogits1DMaxShiftExpSum::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
index 5d232c0cf2..ac9ddaa9d4 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
@@ -46,7 +46,9 @@ public:
* @param[in] tensors Tensor arguments to the components
* @param[in] attributes Component attributes
*/
- ClTemplateLogits1DMaxShiftExpSum(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
+ ClTemplateLogits1DMaxShiftExpSum(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes);
/** Prevent instances of this class from being copy constructed */
ClTemplateLogits1DMaxShiftExpSum(const ClTemplateLogits1DMaxShiftExpSum &) = delete;
/** Prevent instances of this class from being copied */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
index 056e570a25..7d7c3e6673 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
@@ -25,6 +25,7 @@
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
@@ -38,11 +39,7 @@ namespace dynamic_fusion
ClTemplateLogits1DNorm::ClTemplateLogits1DNorm(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _sum{},
- _dst{},
- _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_sum = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -76,7 +73,7 @@ std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &com
data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
)_";
- if(_attributes.is_log_softmax())
+ if (_attributes.is_log_softmax())
{
code += R"_(
sum_val = log(sum_val);
@@ -101,23 +98,11 @@ std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &com
void ClTemplateLogits1DNorm::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _sum,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "sum");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
+
+ vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
}
TagLUT ClTemplateLogits1DNorm::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -168,14 +153,14 @@ std::string ClTemplateLogits1DNorm::get_config_id() const
std::set<std::string> ClTemplateLogits1DNorm::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateLogits1DNorm::get_window() const
{
ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
constexpr unsigned int serial_vector_size = 16;
- const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0));
+ const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0));
Window win = calculate_max_window(*_src, Steps(vector_size));
return win.collapse(win, Window::DimZ);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
index 34840c2100..ebb0374501 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
@@ -23,14 +23,13 @@
*/
#include "ClTemplatePool2d.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
-#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
namespace arm_compute
@@ -50,11 +49,7 @@ ClTemplatePool2d::ClTemplatePool2d(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes },
- _settings{ settings }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -71,7 +66,7 @@ std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_grou
ARM_COMPUTE_UNUSED(comp_group);
// Condition to use 2x2 optimized kernel
- if(_attributes.pool_size() == Size2D(2, 2))
+ if (_attributes.pool_size() == Size2D(2, 2))
{
return get_2x2_kernel_code();
}
@@ -83,11 +78,13 @@ std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_grou
std::string ClTemplatePool2d::get_MxN_kernel_code() const
{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
+ const auto pool_type = _attributes.pool_type();
+ const bool fp_mixed_precision =
+ (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
// Define pool op macro.
- std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
+ std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
+ : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
// Kernel start
// Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0
@@ -129,7 +126,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
)_";
// Determine filter size depending on if padding is excluded or not
- if(_attributes.exclude_padding())
+ if (_attributes.exclude_padding())
{
code += R"_(
const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s);
@@ -144,7 +141,8 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
// Loop through pool size
// if global pooling
- if(_attributes.pool_size().x() == _src->dimension(width_idx) && _attributes.pool_size().y() == _src->dimension(height_idx))
+ if (_attributes.pool_size().x() == _src->dimension(width_idx) &&
+ _attributes.pool_size().y() == _src->dimension(height_idx))
{
// Begin loop
code += R"_(
@@ -173,7 +171,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
// if condition inside loop - use 32bit acc if mixed_precision.
// End loop through pooling section.
- if(fp_mixed_precision)
+ if (fp_mixed_precision)
{
// In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
code += R"_(
@@ -194,7 +192,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
}
// For Pool AVG ONLY, divide pool output by filter size
- if(pool_type == PoolingType::AVG)
+ if (pool_type == PoolingType::AVG)
{
code += R"_(
res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
@@ -202,7 +200,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
}
// If mixed precision convert datatype before storing. Then end kernel.
- if(fp_mixed_precision)
+ if (fp_mixed_precision)
{
code += R"_(
VEC_DATA_TYPE({{DATA_TYPE}}, N0)
@@ -228,9 +226,11 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
std::string ClTemplatePool2d::get_2x2_kernel_code() const
{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
- std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
+ const auto pool_type = _attributes.pool_type();
+ const bool fp_mixed_precision =
+ (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
+ std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
+ : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
std::string code = R"_(
//------------------ START KERNEL {{meta_kernel_id}} ---------------------
@@ -274,7 +274,7 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0);
)_";
- if(fp_mixed_precision)
+ if (fp_mixed_precision)
{
// In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
code += R"_(
@@ -294,7 +294,7 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
)_";
}
- if(pool_type != PoolingType::MAX)
+ if (pool_type != PoolingType::MAX)
{
// Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound)
code += R"_(
@@ -321,10 +321,10 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
res0 = POOL_OP(res0, data3);
)_";
- if(pool_type == PoolingType::AVG)
+ if (pool_type == PoolingType::AVG)
{
// If avg pooling divide result accordingly.
- if(_attributes.exclude_padding())
+ if (_attributes.exclude_padding())
{
code += R"_(
res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
@@ -339,7 +339,7 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
}
// Store result
- if(fp_mixed_precision)
+ if (fp_mixed_precision)
{
code += R"_(
VEC_DATA_TYPE({{DATA_TYPE}}, N0)
@@ -365,17 +365,11 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -391,12 +385,15 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["meta_kernel_id"] = id();
// Retrieve relevant data
- const auto padding = _attributes.pad();
- const auto stride = _attributes.stride();
- const auto pool_size = _attributes.pool_size();
- const auto data_type = _src->data_type();
- const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
- const std::string max_initial_value = _settings.use_inf_as_limit() ? "(-INFINITY)" : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
+ const auto padding = _attributes.pad();
+ const auto stride = _attributes.stride();
+ const auto pool_size = _attributes.pool_size();
+ const auto data_type = _src->data_type();
+ const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() &&
+ _attributes.pool_type() != PoolingType::MAX;
+ const std::string max_initial_value =
+ _settings.use_inf_as_limit() ? "(-INFINITY)"
+ : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
// pool specific
lut["STRIDE_X"] = stride.x();
@@ -407,7 +404,8 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["POOL_SIZE_Y"] = pool_size.height;
// Datatypes and variables
- lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type((use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
+ lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type(
+ (use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
lut["SRC_WIDTH"] = _src->dimension(width_idx);
lut["SRC_HEIGHT"] = _src->dimension(height_idx);
@@ -454,14 +452,14 @@ std::string ClTemplatePool2d::get_config_id() const
std::set<std::string> ClTemplatePool2d::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h", "repeat.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h", "repeat.h"};
}
Window ClTemplatePool2d::get_window() const
{
ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
const auto output_shape = _dst->tensor_shape();
- const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
+ const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
// Create and configure kernel window
auto win = calculate_max_window(output_shape, Steps(vec_size));
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
index ef1c100f44..d1d3c01669 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
index 8b50f1e209..c882353fcb 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
@@ -36,11 +37,8 @@ namespace dynamic_fusion
{
constexpr unsigned int vector_size_byte_opencl = 16;
-ClTemplateReshape::ClTemplateReshape(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _dst{}
+ClTemplateReshape::ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -97,23 +95,17 @@ TILE(uint, M0, 1, g_dst_indirect_y);
void ClTemplateReshape::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(common_tensor_type),
- "dst");
+ vtable.declare_variable(comp_group, _src,
+ GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
}
TagLUT ClTemplateReshape::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
ARM_COMPUTE_UNUSED(comp_group);
- TagLUT lut{};
+ TagLUT lut{};
// Arguments and global shared variables
lut["src"] = vtable.get_variable(_src);
@@ -153,7 +145,7 @@ std::string ClTemplateReshape::get_config_id() const
std::set<std::string> ClTemplateReshape::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateReshape::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
index 56b6585b61..838a21db6d 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE
#include "arm_compute/core/experimental/Types.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
@@ -42,8 +43,7 @@ public:
* @param[in] id Component id
* @param[in] tensors Tensor arguments to the components
*/
- ClTemplateReshape(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors);
+ ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
/** Prevent instances of this class from being copy constructed */
ClTemplateReshape(const ClTemplateReshape &reshape) = delete;
/** Prevent instances of this class from being copied */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
index aaed1d990d..846c712ceb 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/core/utils/ScaleUtils.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
@@ -37,8 +38,10 @@ namespace experimental
{
namespace dynamic_fusion
{
-ClTemplateResize::ClTemplateResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const ClTemplateResize::Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}, _attributes{ attributes }
+ClTemplateResize::ClTemplateResize(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const ClTemplateResize::Attributes &attributes)
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -63,9 +66,9 @@ TILE(uint, 1, 1, g_dst_indirect_y);
const int bout = g_ind_2 / {{arg_dst}}_h;
)_";
- if(_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR)
+ if (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR)
{
- if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
+ if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
{
code += R"_(
float xi_f = (g_ind_1 * {{SCALE_X}});
@@ -80,7 +83,7 @@ TILE(uint, 1, 1, g_dst_indirect_y);
)_";
}
- if(_attributes.align_corners())
+ if (_attributes.align_corners())
{
code += R"_(
xi_f = round(xi_f);
@@ -95,9 +98,9 @@ TILE(uint, 1, 1, g_dst_indirect_y);
T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, {{dst}});
)_";
}
- else if(_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR)
+ else if (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR)
{
- if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
+ if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
{
code += R"_(
float xi_f = (g_ind_1 * {{SCALE_X}});
@@ -137,7 +140,7 @@ TILE(uint, 1, 1, g_dst_indirect_y);
T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in11);
)_";
- if(is_data_type_float(_src->data_type()))
+ if (is_data_type_float(_src->data_type()))
{
code += R"_(
const {{SRC_DATA_TYPE}} a = ({{SRC_DATA_TYPE}})(xi_f - (float)xi);
@@ -158,9 +161,9 @@ TILE(uint, 1, 1, g_dst_indirect_y);
const float b1 = (1.f - a1);
{{dst}}[0].v = CONVERT_SAT(
- (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
+ (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
(CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
- (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
+ (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
(CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0));
)_";
}
@@ -179,22 +182,18 @@ TILE(uint, 1, 1, g_dst_indirect_y);
return code;
}
-void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable, const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
+void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable,
+ const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
-TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
+TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable,
+ const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
{
TagLUT lut{};
@@ -212,8 +211,10 @@ TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["DST_DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type());
lut["CONSTANT_VALUE"] = string_from_pixel_value(0, _src->data_type());
- const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
- const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
+ const float scale_x =
+ scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
+ const float scale_y =
+ scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
lut["SCALE_X"] = float_to_string_with_full_precision(scale_x);
lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y);
@@ -242,7 +243,8 @@ std::string ClTemplateResize::get_config_id() const
std::string config_id{};
config_id += "resize_";
- config_id += (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : "");
+ config_id +=
+ (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : "");
config_id += (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "BILINEAR" : "");
config_id += "_";
config_id += (_attributes.sampling_policy() == SamplingPolicy::CENTER ? "center" : "topleft");
@@ -260,7 +262,7 @@ std::string ClTemplateResize::get_config_id() const
std::set<std::string> ClTemplateResize::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateResize::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
index 217214ced3..d0ec91e0a9 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
@@ -32,7 +32,7 @@ namespace experimental
namespace dynamic_fusion
{
ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -61,16 +61,10 @@ std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group
void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
index 3f97a82204..b8c82ceadd 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
#include "arm_compute/core/experimental/Types.h"
+
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
index eda15f1d95..d3d7c8db83 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
@@ -24,6 +24,7 @@
#include "ClTemplateWriter.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
@@ -39,11 +40,11 @@ std::string ClTemplateWriter::replace_tags(const std::string &code_template, con
std::string replaced_code = "";
bool scanning_pattern = false;
std::string pattern_found = "";
- for(size_t i = 0; i < code_template.size() - 1; ++i)
+ for (size_t i = 0; i < code_template.size() - 1; ++i)
{
- if(!scanning_pattern)
+ if (!scanning_pattern)
{
- if(code_template[i] == '{' && code_template[i + 1] == '{')
+ if (code_template[i] == '{' && code_template[i + 1] == '{')
{
i += 1;
scanning_pattern = true;
@@ -56,7 +57,7 @@ std::string ClTemplateWriter::replace_tags(const std::string &code_template, con
}
else
{
- if(code_template[i] == '}' && code_template[i + 1] == '}')
+ if (code_template[i] == '}' && code_template[i + 1] == '}')
{
i += 1;
scanning_pattern = false;
@@ -76,8 +77,7 @@ std::string ClTemplateWriter::replace_tags(const std::string &code_template, con
ClTemplateWriter::~ClTemplateWriter()
{
}
-ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
- : _components{ components }
+ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) : _components{components}
{
}
std::string ClTemplateWriter::get_name()
@@ -91,7 +91,7 @@ std::string ClTemplateWriter::get_code()
std::string ClTemplateWriter::get_config_id()
{
std::string config_id = get_name();
- for(const auto &comp : _components)
+ for (const auto &comp : _components)
{
config_id += "--" + comp->template_writer()->get_config_id() + "--";
}
@@ -103,7 +103,7 @@ CLBuildOptions ClTemplateWriter::get_build_options()
{
CLBuildOptions build_opts{};
- for(const auto &comp : _components)
+ for (const auto &comp : _components)
{
build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
}
@@ -122,11 +122,9 @@ std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
{
// Assemble GpuKernelArguments
std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
- for(const auto t : _components.get_argument_tensors())
+ for (const auto t : _components.get_argument_tensors())
{
- tensors.emplace(
- t->id(),
- GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
+ tensors.emplace(t->id(), GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info});
}
return tensors;
}
@@ -141,22 +139,24 @@ std::string ClTemplateWriter::write_code()
std::vector<std::string> component_codes{}; // vector because order matters
// Pass 1: Declare all kernel variables
- for(auto &component : _components)
+ for (auto &component : _components)
{
component->template_writer()->declare_variables(_vtable, _components);
}
// Pass 2: Generate component codes
- for(auto &component : _components)
+ for (auto &component : _components)
{
const auto component_writer = component->template_writer();
auto curr_headers_list = component_writer->get_headers_list();
auto curr_additional_macros = component_writer->get_additional_macros();
auto curr_component_code = component_writer->get_component_code(_components);
- const auto var_lut = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
+ const auto var_lut = component_writer->get_tag_lut(
+ _vtable,
+ _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
component_codes.push_back(replace_tags(curr_component_code, var_lut));
headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
- if(!additional_macros.empty()) // Some components might not have any
+ if (!additional_macros.empty()) // Some components might not have any
{
additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
}
@@ -165,7 +165,7 @@ std::string ClTemplateWriter::write_code()
// Step 3: Assemble the data gathered by traversing the graph into the string "code"
std::string code = "";
- for(auto &header : headers_list)
+ for (auto &header : headers_list)
{
#if defined(EMBEDDED_KERNELS)
code += CLKernelLibrary::get().get_program(header).first;
@@ -174,16 +174,14 @@ std::string ClTemplateWriter::write_code()
#endif // defined(EMBEDDED_KERNELS)
}
- for(auto &macros : additional_macros)
+ for (auto &macros : additional_macros)
{
code += macros;
}
auto arguments = _components.get_argument_tensors();
- std::sort(arguments.begin(), arguments.end(), [](const ITensorInfo * l, const ITensorInfo * r)
- {
- return l->id() < r->id();
- });
+ std::sort(arguments.begin(), arguments.end(),
+ [](const ITensorInfo *l, const ITensorInfo *r) { return l->id() < r->id(); });
code += write_kernel_signature(_vtable.get_variable_list(arguments));
code += "\n{\n\n";
@@ -198,7 +196,7 @@ std::string ClTemplateWriter::write_code()
tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n";
- for(auto tile : tiles)
+ for (auto tile : tiles)
{
const auto var = _vtable.get_variable(tile);
const auto data_type = get_cl_type_from_data_type(tile->data_type());
@@ -212,7 +210,7 @@ std::string ClTemplateWriter::write_code()
code += tiles_ss.str();
}
- for(const auto &component_code : component_codes)
+ for (const auto &component_code : component_codes)
{
code += component_code;
code += "\n";
@@ -231,7 +229,8 @@ std::string ClTemplateWriter::write_global_section() const
auto leftover_w = dst_w % tile_w;
std::string code = "";
- code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
+ code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " +
+ std::to_string(leftover_w) + ");\n";
code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
@@ -243,7 +242,7 @@ std::string ClTemplateWriter::write_global_section() const
std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
{
std::string code;
- switch(var.kernel_argument_info.type)
+ switch (var.kernel_argument_info.type)
{
case GpuKernelArgumentInfo::Type::Vector:
{
@@ -293,11 +292,11 @@ std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTabl
{
std::string code = "\n__kernel void " + write_kernel_name() + "(";
- for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
+ for (int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
{
code += write_argument_declaration(argument_list[i]) + ",";
}
- if(static_cast<int>(argument_list.size()) - 1 >= 0)
+ if (static_cast<int>(argument_list.size()) - 1 >= 0)
{
code += write_argument_declaration(argument_list[argument_list.size() - 1]);
}
@@ -308,12 +307,12 @@ std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTabl
}
std::string ClTemplateWriter::write_kernel_name() const
{
- if(_components.empty())
+ if (_components.empty())
{
return "empty_kernel";
}
std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
- for(size_t i = 1; i < _components.size(); ++i)
+ for (size_t i = 1; i < _components.size(); ++i)
{
name += "___";
name += _components[i]->template_writer()->get_name();