diff options
author | Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-27 17:46:17 +0100 |
---|---|---|
committer | felixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> | 2023-09-28 12:08:05 +0000 |
commit | afd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch) | |
tree | 03bc7d5a762099989b16a656fa8d397b490ed70e /src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp | |
parent | bdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff) | |
download | ComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz |
Apply clang-format on repository
Code is formatted as per a revised clang format configuration
file(not part of this delivery). Version 14.0.6 is used.
Exclusion List:
- files with .cl extension
- files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...)
And the following directories
- compute_kernel_writer/validation/
- tests/
- include/
- src/core/NEON/kernels/convolution/
- src/core/NEON/kernels/arm_gemm/
- src/core/NEON/kernels/arm_conv/
- data/
There will be a follow up for formatting of .cl files and the
files under tests/ and compute_kernel_writer/validation/.
Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>
Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp')
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp | 84 |
1 files changed, 48 insertions, 36 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp index c8bf999261..2935ba45ea 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp @@ -24,22 +24,24 @@ #include "GpuCkwElementwiseBinary.h" #include "arm_compute/core/Error.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/core/utils/StringUtils.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" +#include "arm_compute/core/utils/StringUtils.h" +#include "arm_compute/core/Validate.h" #include "ckw/TensorTileSampler.h" #include "ckw/types/TensorSamplerTypes.h" + #include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h" #include "src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "support/StringSupport.h" + #include <algorithm> #include <string> @@ -53,11 +55,7 @@ namespace dynamic_fusion GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) - : IGpuCkwComponentDriver{ id, tensors }, - _lhs{}, - _rhs{}, - _dst{}, - _attributes{ attributes } + : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes} { _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); @@ -65,15 +63,20 @@ GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst); } -void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const +void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, + GpuCkwVariableTable &vtable, + GpuCkwScopedKernelWriter writer) const { const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window(); const auto n0 = static_cast<int32_t>(root_window.x().step()); const auto m0 = static_cast<int32_t>(root_window.y().step()); - GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs"); - GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); + GpuCkwComponentArgument *lhs = + vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs"); + GpuCkwComponentArgument *rhs = + vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs"); + GpuCkwComponentArgument *dst = + vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32); auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32); @@ -86,32 +89,36 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr auto &const_0 = writer->declare_tile("0", 0); // Load the LHS and RHS tiles - if(!lhs->has_tile()) + if (!lhs->has_tile()) { - auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1), n0, m0, "lhs_", const_0); + auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1), + n0, m0, "lhs_", const_0); sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension sampler.z(const_0); sampler.b(gid_2); writer->op_load_once(lhs, sampler); } - if(!rhs->has_tile()) + if (!rhs->has_tile()) { - auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1), n0, m0, "rhs_", const_0); + auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1), + n0, m0, "rhs_", const_0); sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension sampler.z(const_0); sampler.b(gid_2); writer->op_load_once(rhs, sampler); } - auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1), n0, m0, "dst_", const_0); + auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1), + n0, m0, "dst_", const_0); dst_sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension dst_sampler.z(const_0); dst_sampler.b(gid_2); // Prepare the output tile. - if(!dst->has_tile()) + if (!dst->has_tile()) { - auto &tile = writer->declare_tile("dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width())); + auto &tile = writer->declare_tile( + "dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width())); dst->init_virtual_tensor(tile, dst_sampler); } @@ -131,9 +138,10 @@ Window GpuCkwElementwiseBinary::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - constexpr unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = + adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } @@ -141,11 +149,12 @@ Window GpuCkwElementwiseBinary::get_window() const std::string GpuCkwElementwiseBinary::get_name(const ComponentGroup &comp_group) const { ARM_COMPUTE_UNUSED(comp_group); - const std::vector<std::string> build_params = - { + const std::vector<std::string> build_params = { "elementwise_binary", - "op", to_string(_attributes.operation()), - "dt", lower_string(string_from_data_type(_dst->data_type())), + "op", + to_string(_attributes.operation()), + "dt", + lower_string(string_from_data_type(_dst->data_type())), }; return join(build_params, "_"); } @@ -154,13 +163,16 @@ std::string GpuCkwElementwiseBinary::get_tuner_id(const ComponentGroup &comp_gro { ARM_COMPUTE_UNUSED(comp_group); /// NOTE: Hardcoded for now, the parameters should ideally be exported by ckw (a selection of constant tiles) - std::vector<std::string> build_params = - { + std::vector<std::string> build_params = { "elementwise_binary", - "op", to_string(_attributes.operation()), - "dt", lower_string(string_from_data_type(_dst->data_type())), - "dst_dim0", support::cpp11::to_string(_dst->dimension(0)), - "dst_dim1", support::cpp11::to_string(_dst->dimension(1)), + "op", + to_string(_attributes.operation()), + "dt", + lower_string(string_from_data_type(_dst->data_type())), + "dst_dim0", + support::cpp11::to_string(_dst->dimension(0)), + "dst_dim1", + support::cpp11::to_string(_dst->dimension(1)), }; return join(build_params, "_"); } |