diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp')
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp | 84 |
1 files changed, 48 insertions, 36 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp index c8bf999261..2935ba45ea 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp @@ -24,22 +24,24 @@ #include "GpuCkwElementwiseBinary.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" +#include "arm_compute/core/utils/StringUtils.h" +#include "arm_compute/core/Validate.h" #include "ckw/TensorTileSampler.h" #include "ckw/types/TensorSamplerTypes.h" + #include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h" #include "src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "support/StringSupport.h" + #include <algorithm> #include <string> @@ -53,11 +55,7 @@ namespace dynamic_fusion GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) - : IGpuCkwComponentDriver{ id, tensors }, - _lhs{}, - _rhs{}, - _dst{}, - _attributes{ attributes } + : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes} { _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); @@ -65,15 +63,20 @@ GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst); } -void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const +void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, + GpuCkwVariableTable &vtable, + GpuCkwScopedKernelWriter writer) const { const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window(); const auto n0 = static_cast<int32_t>(root_window.x().step()); const auto m0 = static_cast<int32_t>(root_window.y().step()); - GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs"); - GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); + GpuCkwComponentArgument *lhs = + vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs"); + GpuCkwComponentArgument *rhs = + vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs"); + GpuCkwComponentArgument *dst = + vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32); auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32); @@ -86,32 +89,36 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr auto &const_0 = writer->declare_tile("0", 0); // Load the LHS and RHS tiles - if(!lhs->has_tile()) + if (!lhs->has_tile()) { - auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1), n0, m0, "lhs_", const_0); + auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1), + n0, m0, "lhs_", const_0); sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension sampler.z(const_0); sampler.b(gid_2); writer->op_load_once(lhs, sampler); } - if(!rhs->has_tile()) + if (!rhs->has_tile()) { - auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1), n0, m0, "rhs_", const_0); + auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1), + n0, m0, "rhs_", const_0); sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension sampler.z(const_0); sampler.b(gid_2); writer->op_load_once(rhs, sampler); } - auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1), n0, m0, "dst_", const_0); + auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1), + n0, m0, "dst_", const_0); dst_sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension dst_sampler.z(const_0); dst_sampler.b(gid_2); // Prepare the output tile. - if(!dst->has_tile()) + if (!dst->has_tile()) { - auto &tile = writer->declare_tile("dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width())); + auto &tile = writer->declare_tile( + "dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width())); dst->init_virtual_tensor(tile, dst_sampler); } @@ -131,9 +138,10 @@ Window GpuCkwElementwiseBinary::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - constexpr unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = + adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } @@ -141,11 +149,12 @@ Window GpuCkwElementwiseBinary::get_window() const std::string GpuCkwElementwiseBinary::get_name(const ComponentGroup &comp_group) const { ARM_COMPUTE_UNUSED(comp_group); - const std::vector<std::string> build_params = - { + const std::vector<std::string> build_params = { "elementwise_binary", - "op", to_string(_attributes.operation()), - "dt", lower_string(string_from_data_type(_dst->data_type())), + "op", + to_string(_attributes.operation()), + "dt", + lower_string(string_from_data_type(_dst->data_type())), }; return join(build_params, "_"); } @@ -154,13 +163,16 @@ std::string GpuCkwElementwiseBinary::get_tuner_id(const ComponentGroup &comp_gro { ARM_COMPUTE_UNUSED(comp_group); /// NOTE: Hardcoded for now, the parameters should ideally be exported by ckw (a selection of constant tiles) - std::vector<std::string> build_params = - { + std::vector<std::string> build_params = { "elementwise_binary", - "op", to_string(_attributes.operation()), - "dt", lower_string(string_from_data_type(_dst->data_type())), - "dst_dim0", support::cpp11::to_string(_dst->dimension(0)), - "dst_dim1", support::cpp11::to_string(_dst->dimension(1)), + "op", + to_string(_attributes.operation()), + "dt", + lower_string(string_from_data_type(_dst->data_type())), + "dst_dim0", + support::cpp11::to_string(_dst->dimension(0)), + "dst_dim1", + support::cpp11::to_string(_dst->dimension(1)), }; return join(build_params, "_"); } |