diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp')
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp | 128 |
1 files changed, 73 insertions, 55 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp index 15e32e26d5..c8bf999261 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp @@ -25,6 +25,7 @@ #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "ckw/types/TensorSamplerTypes.h" @@ -35,6 +36,11 @@ #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h" +#include "src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h" +#include "support/StringSupport.h" +#include <algorithm> #include <string> using namespace ckw; @@ -44,57 +50,15 @@ namespace experimental { namespace dynamic_fusion { -namespace -{ -/** Create a simple sampler from tile of dimension [m0, n0] - */ -inline TensorTileSampler create_simple_sampler(GpuCkwScopedKernelWriter &writer, int32_t m0, int32_t n0) -{ - TensorTileSampler sampler; - - auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32); - auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32); - auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32); - - writer->op_get_global_id(gid_0, 0); - writer->op_get_global_id(gid_1, 1); - writer->op_get_global_id(gid_2, 2); - - auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32); - auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32); - auto &m0_t = writer->declare_tile("m0", m0); - auto &n0_t = writer->declare_tile("n0", n0); - writer->op_binary_expression(x_coord, gid_0, ckw::BinaryOp::Mul, n0_t); - writer->op_binary_expression(y_coord, gid_1, ckw::BinaryOp::Mul, m0_t); - - sampler.x(x_coord); - sampler.y(y_coord); - auto &const_0 = writer->declare_tile("0", 0); - sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension - sampler.b(gid_2); - - sampler.width(n0); - sampler.height(m0); - - sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension - sampler.address_mode_x(TensorSamplerAddressModeX::None); - sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder); - sampler.address_mode_z(TensorSamplerAddressModeZ::Skip); // Dimensions higher than 3 not supported yet - - return sampler; -} -} // namespace - GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) : IGpuCkwComponentDriver{ id, tensors }, _lhs{}, _rhs{}, - _dst{} + _dst{}, + _attributes{ attributes } { - ARM_COMPUTE_UNUSED(attributes); - _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); @@ -103,32 +67,60 @@ GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const { - const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); + const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window(); + const auto n0 = static_cast<int32_t>(root_window.x().step()); + const auto m0 = static_cast<int32_t>(root_window.y().step()); GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs"); GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs"); GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); - // Load the LHS and RHS tiles and prepare the tensor sampler. - load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler); + auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32); + auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32); + auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32); + + writer->op_get_global_id(gid_0, 0); + writer->op_get_global_id(gid_1, 1); + writer->op_get_global_id(gid_2, 2); + + auto &const_0 = writer->declare_tile("0", 0); + + // Load the LHS and RHS tiles + if(!lhs->has_tile()) + { + auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1), n0, m0, "lhs_", const_0); + sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension + sampler.z(const_0); + sampler.b(gid_2); + writer->op_load_once(lhs, sampler); + } + if(!rhs->has_tile()) + { + auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1), n0, m0, "rhs_", const_0); + sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension + sampler.z(const_0); + sampler.b(gid_2); + writer->op_load_once(rhs, sampler); + } - auto &lhs_tile = lhs->tile(); - auto &rhs_tile = rhs->tile(); - const auto &sampler = lhs->tile_sampler(); + auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1), n0, m0, "dst_", const_0); + dst_sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension + dst_sampler.z(const_0); + dst_sampler.b(gid_2); // Prepare the output tile. if(!dst->has_tile()) { - auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info()); - dst->init_virtual_tensor(tile, sampler); + auto &tile = writer->declare_tile("dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width())); + dst->init_virtual_tensor(tile, dst_sampler); } + auto &lhs_tile = lhs->tile(); + auto &rhs_tile = rhs->tile(); auto &dst_tile = dst->tile(); // Perform the operation. - writer->op_binary_expression(dst_tile, lhs_tile, BinaryOp::Add, rhs_tile); + writer->op_binary_expression(dst_tile, lhs_tile, to_ckw(_attributes), rhs_tile); } Window GpuCkwElementwiseBinary::get_window() const @@ -146,6 +138,32 @@ Window GpuCkwElementwiseBinary::get_window() const return win; } +std::string GpuCkwElementwiseBinary::get_name(const ComponentGroup &comp_group) const +{ + ARM_COMPUTE_UNUSED(comp_group); + const std::vector<std::string> build_params = + { + "elementwise_binary", + "op", to_string(_attributes.operation()), + "dt", lower_string(string_from_data_type(_dst->data_type())), + }; + return join(build_params, "_"); +} + +std::string GpuCkwElementwiseBinary::get_tuner_id(const ComponentGroup &comp_group) const +{ + ARM_COMPUTE_UNUSED(comp_group); + /// NOTE: Hardcoded for now, the parameters should ideally be exported by ckw (a selection of constant tiles) + std::vector<std::string> build_params = + { + "elementwise_binary", + "op", to_string(_attributes.operation()), + "dt", lower_string(string_from_data_type(_dst->data_type())), + "dst_dim0", support::cpp11::to_string(_dst->dimension(0)), + "dst_dim1", support::cpp11::to_string(_dst->dimension(1)), + }; + return join(build_params, "_"); +} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute |