aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp171
1 files changed, 90 insertions, 81 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
index 9c9a298132..8ab3ec3a55 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
@@ -24,17 +24,18 @@
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
+
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
using namespace ckw;
@@ -48,11 +49,7 @@ GpuCkwPool2d::GpuCkwPool2d(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes },
- _settings{ settings }
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
@@ -60,14 +57,18 @@ GpuCkwPool2d::GpuCkwPool2d(ComponentId id,
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
}
-void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
TileOperand &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32);
TileOperand &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
@@ -90,23 +91,26 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
const auto src_data_type = _src->data_type();
// Check if this is global pooling path
- const bool is_global_pooling = (pool_size_x == src_width) && (pool_size_y == src_height) && (pad_x == 0) && (pad_y == 0);
+ const bool is_global_pooling =
+ (pool_size_x == src_width) && (pool_size_y == src_height) && (pad_x == 0) && (pad_y == 0);
// Check if this a case of FP_MIXED_PRECISION
- const bool use_fp_mixed_precision = (src_data_type == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
- const auto acc_data_type = (use_fp_mixed_precision) ? (DataType::F32) : (src_data_type);
+ const bool use_fp_mixed_precision =
+ (src_data_type == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
+ const auto acc_data_type = (use_fp_mixed_precision) ? (DataType::F32) : (src_data_type);
TileOperand &const_0 = writer->declare_tile("0", 0);
const TileOperand &const_1 = writer->declare_tile("1", 1);
const TileOperand &const_lowest_value = writer->declare_tile("LOWEST_VALUE", std::numeric_limits<float>::lowest());
const TileOperand &pool_size_x_tile = writer->declare_tile("POOL_SIZE_X", pool_size_x);
const TileOperand &pool_size_y_tile = writer->declare_tile("POOL_SIZE_Y", pool_size_y);
- const TileOperand &stride_x_tile = writer->declare_tile("STRIDE_X", static_cast<int32_t>(_attributes.stride().x()));
- const TileOperand &stride_y_tile = writer->declare_tile("STRIDE_Y", static_cast<int32_t>(_attributes.stride().y()));
- const TileOperand &pad_x_tile = writer->declare_tile("PAD_X", pad_x);
- const TileOperand &pad_y_tile = writer->declare_tile("PAD_Y", pad_y);
- const TileOperand &dst_height_tile = writer->declare_tile("DST_HEIGHT", static_cast<int32_t>(_dst->dimension(height_idx)));
- const TileOperand &src_height_tile = writer->declare_tile("SRC_HEIGHT", src_height);
- const TileOperand &src_width_tile = writer->declare_tile("SRC_WIDTH", src_width);
+ const TileOperand &stride_x_tile = writer->declare_tile("STRIDE_X", static_cast<int32_t>(_attributes.stride().x()));
+ const TileOperand &stride_y_tile = writer->declare_tile("STRIDE_Y", static_cast<int32_t>(_attributes.stride().y()));
+ const TileOperand &pad_x_tile = writer->declare_tile("PAD_X", pad_x);
+ const TileOperand &pad_y_tile = writer->declare_tile("PAD_Y", pad_y);
+ const TileOperand &dst_height_tile =
+ writer->declare_tile("DST_HEIGHT", static_cast<int32_t>(_dst->dimension(height_idx)));
+ const TileOperand &src_height_tile = writer->declare_tile("SRC_HEIGHT", src_height);
+ const TileOperand &src_width_tile = writer->declare_tile("SRC_WIDTH", src_width);
TileOperand &idx_out_n = writer->declare_tile("idx_out_n", ckw::DataType::Int32);
TileOperand &idx_out_h = writer->declare_tile("idx_out_h", ckw::DataType::Int32);
@@ -145,7 +149,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
// Prepare dst tensor and tile
TileInfo dst_tile_info = TileInfo(to_ckw(src_data_type), m0, n0);
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
TileOperand &dst_tile = writer->declare_tile("dst_tile", dst_tile_info);
dst->init_virtual_tensor(dst_tile, dst_sampler);
@@ -156,14 +160,15 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
const TileOperand &res_tile = writer->declare_tile("res_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
// Initialise result tile with appropriate value
- if(_attributes.pool_type() == PoolingType::MAX)
+ if (_attributes.pool_type() == PoolingType::MAX)
{
- if(_settings.use_inf_as_limit())
+ if (_settings.use_inf_as_limit())
{
TileContainer minus_inf_tile_container;
std::vector<std::string> value = std::vector<std::string>(n0, "(-INFINITY)");
- minus_inf_tile_container.push_back({ value });
- const TileOperand &minus_inf = writer->declare_tile("minus_inf_const", minus_inf_tile_container, to_ckw(acc_data_type));
+ minus_inf_tile_container.push_back({value});
+ const TileOperand &minus_inf =
+ writer->declare_tile("minus_inf_const", minus_inf_tile_container, to_ckw(acc_data_type));
writer->op_assign(res_tile, minus_inf);
}
else
@@ -209,7 +214,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
writer->op_binary_elementwise_function(pool_y_e, BinaryFunction::Min, pool_size_y_tile, pool_y_e);
const TileOperand &filter_size = writer->declare_tile("filter_size", ckw::DataType::Int32);
- if(_attributes.exclude_padding())
+ if (_attributes.exclude_padding())
{
const TileOperand &y_diff = writer->declare_tile("y_diff", ckw::DataType::Int32);
const TileOperand &x_diff = writer->declare_tile("x_diff", ckw::DataType::Int32);
@@ -227,7 +232,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
const TileOperand &x = writer->declare_tile("x", ckw::DataType::Int32);
const TileOperand &y = writer->declare_tile("y", ckw::DataType::Int32);
- if(is_global_pooling)
+ if (is_global_pooling)
{
writer->op_assign(x, const_0);
writer->op_assign(y, const_0);
@@ -242,76 +247,80 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
}
// Y dim for-loop
- writer->op_for_loop(y, BinaryOp::Less, pool_y_e, y, AssignmentOp::Increment, const_1, [&]()
- {
- // Reset the iterator for the inner loop
- if(is_global_pooling)
- {
- writer->op_assign(x, const_0);
- }
- else
+ writer->op_for_loop(
+ y, BinaryOp::Less, pool_y_e, y, AssignmentOp::Increment, const_1,
+ [&]()
{
- writer->op_assign(x, pool_x_s);
- }
-
- TileOperand &a_y = writer->declare_tile("a_y", ckw::DataType::Int32);
- writer->op_binary_expression(a_y, idx_in_h, BinaryOp::Add, y);
-
- // X dim for-loop
- writer->op_for_loop(x, BinaryOp::Less, pool_x_e, x, AssignmentOp::Increment, const_1, [&]()
- {
- TileOperand &a_x = writer->declare_tile("a_x", ckw::DataType::Int32);
- writer->op_binary_expression(a_x, idx_in_w, BinaryOp::Add, x);
-
- TileOperand &src_tile = writer->declare_tile("src_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
-
- src_sampler.y(a_x);
- src_sampler.z(a_y);
-
- // Load src tile
- if(use_fp_mixed_precision)
+ // Reset the iterator for the inner loop
+ if (is_global_pooling)
{
- TileOperand &src_uncasted_tile = writer->declare_tile("uncasted_src_tile", dst_tile_info);
- writer->op_load(src_uncasted_tile, src->tensor(), src_sampler);
- writer->op_cast_expression(src_tile, src_uncasted_tile, ckw::ConvertPolicy::None);
+ writer->op_assign(x, const_0);
}
else
{
- writer->op_load(src_tile, src->tensor(), src_sampler);
+ writer->op_assign(x, pool_x_s);
}
- // Take the square of the input, for L2 Pooling
- if(_attributes.pool_type() == PoolingType::L2)
- {
- writer->op_binary_expression(src_tile, src_tile, BinaryOp::Mul, src_tile);
- }
-
- // Perfom Pooling op
- if(_attributes.pool_type() == PoolingType::MAX)
- {
- writer->op_binary_elementwise_function(res_tile, BinaryFunction::Max, res_tile, src_tile);
- }
- else
- {
- writer->op_binary_expression(res_tile, res_tile, BinaryOp::Add, src_tile);
- }
+ TileOperand &a_y = writer->declare_tile("a_y", ckw::DataType::Int32);
+ writer->op_binary_expression(a_y, idx_in_h, BinaryOp::Add, y);
+
+ // X dim for-loop
+ writer->op_for_loop(
+ x, BinaryOp::Less, pool_x_e, x, AssignmentOp::Increment, const_1,
+ [&]()
+ {
+ TileOperand &a_x = writer->declare_tile("a_x", ckw::DataType::Int32);
+ writer->op_binary_expression(a_x, idx_in_w, BinaryOp::Add, x);
+
+ TileOperand &src_tile = writer->declare_tile("src_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
+
+ src_sampler.y(a_x);
+ src_sampler.z(a_y);
+
+ // Load src tile
+ if (use_fp_mixed_precision)
+ {
+ TileOperand &src_uncasted_tile = writer->declare_tile("uncasted_src_tile", dst_tile_info);
+ writer->op_load(src_uncasted_tile, src->tensor(), src_sampler);
+ writer->op_cast_expression(src_tile, src_uncasted_tile, ckw::ConvertPolicy::None);
+ }
+ else
+ {
+ writer->op_load(src_tile, src->tensor(), src_sampler);
+ }
+
+ // Take the square of the input, for L2 Pooling
+ if (_attributes.pool_type() == PoolingType::L2)
+ {
+ writer->op_binary_expression(src_tile, src_tile, BinaryOp::Mul, src_tile);
+ }
+
+ // Perfom Pooling op
+ if (_attributes.pool_type() == PoolingType::MAX)
+ {
+ writer->op_binary_elementwise_function(res_tile, BinaryFunction::Max, res_tile, src_tile);
+ }
+ else
+ {
+ writer->op_binary_expression(res_tile, res_tile, BinaryOp::Add, src_tile);
+ }
+ });
});
- });
- if((_attributes.pool_type() == PoolingType::AVG) || (_attributes.pool_type() == PoolingType::L2))
+ if ((_attributes.pool_type() == PoolingType::AVG) || (_attributes.pool_type() == PoolingType::L2))
{
// filter_size is automatically broadcasted in the operation
writer->op_binary_expression(res_tile, res_tile, BinaryOp::Div, filter_size);
}
// Take square root of the result in L2 pooling
- if(_attributes.pool_type() == PoolingType::L2)
+ if (_attributes.pool_type() == PoolingType::L2)
{
writer->op_unary_elementwise_function(res_tile, UnaryFunction::Sqrt, res_tile);
}
// Store the results and do casting if FP_MIXED_PRECISION
- if(use_fp_mixed_precision)
+ if (use_fp_mixed_precision)
{
writer->op_cast_expression(dst_tile, res_tile, ckw::ConvertPolicy::None);
}
@@ -326,7 +335,7 @@ Window GpuCkwPool2d::get_window() const
ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
TensorShape output_shape = _dst->tensor_shape();
- const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
+ const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
// Create and configure kernel window
auto win = calculate_max_window(output_shape, Steps(vec_size));
win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size.