From 19844f605f5e5b71d05164711dee13f8652adafe Mon Sep 17 00:00:00 2001 From: SiCong Li Date: Tue, 16 May 2023 16:46:34 +0100 Subject: Add Kernel Writer driver code to dynamic fusion * Partially port ElementwiseBinary component to ckw (broadcast not supported yet) * Port Store component to ckw * Move KernelArgumentsHelpers to ckw_driver/ as it's only used by the driver ckw_driver is a middle layer between dynamic fusion and Compute Kernel Writer (CKW). It consumes the fused kernel component stream produced by Dynamic Fusion and uses CKW to write the kernel code complete with all meta info needed by the runtime to enqueue the kernel. It consists of two parts: * Kernel writing: This resides in dynamic_fusion/sketch * Runtime utilities: This resides in dynamic_fusion/runtime The integration (separation between DF and CKW) occurs in two places: * Inside GpuCKWDriver global driver that coordinates how the final fused kernel code is assembled together alongwith other meta info needed by runtime. * Inside each instantiated IGpuCKWComponentDriver component driver that drives CKW to write component-specific code or do component-specific configurations Partially resolves: COMPMID-5792 COMPMID-6282 COMPMID-6260 COMPMID-6266 Signed-off-by: SiCong Li Change-Id: Ib57a080a65fe8cfee1a8df1529fe572005a6d2f2 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9847 Reviewed-by: Gunes Bayir Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- .../components/GpuCkwElementwiseBinary.cpp | 143 +++++++++++++++++++++ .../components/GpuCkwElementwiseBinary.h | 69 ++++++++++ .../gpu/ckw_driver/components/GpuCkwStore.cpp | 57 ++++++++ .../sketch/gpu/ckw_driver/components/GpuCkwStore.h | 61 +++++++++ .../ckw_driver/components/utils/TypeConverter.h | 102 +++++++++++++++ .../gpu/ckw_driver/components/utils/WriterHelper.h | 68 ++++++++++ 6 files changed, 500 insertions(+) create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h (limited to 'src/dynamic_fusion/sketch/gpu/ckw_driver/components') diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp new file mode 100644 index 0000000000..9895bbeb77 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp @@ -0,0 +1,143 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "GpuCkwElementwiseBinary.h" + +#include "acl/AclKernelWriter.h" +#include "acl/AclScopedKernelWriter.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Validate.h" +#include "ckw/TensorTileSampler.h" +#include "ckw/Types.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" +#include + +using namespace ckw; +namespace arm_compute +{ +namespace experimental +{ +namespace +{ +/** Create a simple sampler from tile of dimension [m0, n0] + */ +inline TensorTileSampler create_simple_sampler(AclScopedKernelWriter &writer, int32_t m0, int32_t n0) +{ + TensorTileSampler sampler; + + auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32); + auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32); + auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32); + + auto &const_0 = writer->declare_tile("0", 0); + + writer->op_get_global_id(gid_0, 0); + writer->op_get_global_id(gid_1, 1); + writer->op_get_global_id(gid_2, 2); + + sampler.x(gid_0); + sampler.y(gid_1); + sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension + sampler.b(gid_2); + + sampler.width(n0); + sampler.height(m0); + + sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension + sampler.address_mode_x(TensorSamplerAddressModeX::None); + sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder); + sampler.address_mode_z(TensorSamplerAddressModeZ::Skip); // Dimensions higher than 3 not supported yet + + return sampler; +} +} // namespace + +namespace dynamic_fusion +{ +GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId id, + const ArgumentPack &tensors, + const Attributes &attributes) + : IGpuCkwComponentDriver{ id, tensors }, + _lhs{}, + _rhs{}, + _dst{}, + _attributes{ attributes } +{ + _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); + _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); + _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); + ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst); +} + +void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, AclScopedKernelWriter writer) const +{ + const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window(); + const unsigned int n0 = root_window.x().step(); + const unsigned int m0 = root_window.y().step(); + + AclComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs"); + AclComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs"); + AclComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + + // Load the LHS and RHS tiles and prepare the tensor sampler. + load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler); + + auto &lhs_tile = lhs->tile(); + auto &rhs_tile = rhs->tile(); + const auto &sampler = lhs->tile_sampler(); + + // Prepare the output tile. + if(!dst->has_tile()) + { + auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info()); + dst->init_virtual_tensor(tile, sampler); + } + + auto &dst_tile = dst->tile(); + + // Perform the operation. + writer->op_binary_expression(dst_tile, lhs_tile, rhs_tile, BinaryOp::Add); +} + +Window GpuCkwElementwiseBinary::get_window() const +{ + ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); + + TensorShape output_shape = _dst->tensor_shape(); + // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged + // This is in line with the collapsing convention used by operators like Conv2d + output_shape.collapse(2U, 1U); + // constexpr unsigned int vector_size_byte_opencl = 16; + // const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + const unsigned int num_elems_processed_per_iteration = 1U; // Hard-coded for now + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + + return win; +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h new file mode 100644 index 0000000000..1a79754d1d --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWELEMENTWISEBINARY +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWELEMENTWISEBINARY + +#include "src/core/common/Macros.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h" +#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +class GpuCkwElementwiseBinary : public IGpuCkwComponentDriver +{ +public: + using Attributes = ClComponentElementwiseBinary::Attributes; + /** Constructor + * + * For supported configurations please refer to @ref ClComponentElementwiseBinary::validate() + * + * @param[in] id Component id + * @param[in] tensors Tensor arguments to the component + * @param[in] attributes Component attributes + */ + GpuCkwElementwiseBinary(ComponentId id, + const ArgumentPack &tensors, + const Attributes &attributes); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwElementwiseBinary); + /** Destructor */ + ~GpuCkwElementwiseBinary() override = default; + // Inherited methods overriden: + virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, AclScopedKernelWriter writer) const override; + Window get_window() const override; + +private: + const ITensorInfo *_lhs; + const ITensorInfo *_rhs; + const ITensorInfo *_dst; + Attributes _attributes; +}; +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWELEMENTWISEBINARY */ diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp new file mode 100644 index 0000000000..1a1dfc135a --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp @@ -0,0 +1,57 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "GpuCkwStore.h" + +#include "arm_compute/core/Error.h" +#include "compute_kernel_writer/include/acl/AclKernelWriter.h" +#include "compute_kernel_writer/include/acl/AclScopedKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" +#include + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack &tensors) + : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{} +{ + _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); + _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); +} +void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, AclScopedKernelWriter writer) const +{ + auto src = vtable.declare_variable(comp_group, writer, _src, "src"); + auto dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + + auto &src_tile = src->tile(); + const auto &sampler = src->tile_sampler(); + auto &dst_tensor = dst->tensor(); + + writer->op_store(dst_tensor, src_tile, sampler); +} +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h new file mode 100644 index 0000000000..45cc43fe62 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE + +#include "src/core/common/Macros.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +/** An interface used by @ref ClTemplateWriter to write source code for a kernel component + */ +class GpuCkwStore : public IGpuCkwComponentDriver +{ +public: + /** Constructor + * + * @param[in] id Component id + * @param[in] tensors Tensor arguments to the component + */ + GpuCkwStore(ComponentId id, const ArgumentPack &tensors); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwStore); + /** Destructor */ + ~GpuCkwStore() override = default; + // Inherited methods overriden: + virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, AclScopedKernelWriter writer) const override; + +private: + const ITensorInfo *_src; + const ITensorInfo *_dst; +}; +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE */ diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h new file mode 100644 index 0000000000..2531fb7379 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h @@ -0,0 +1,102 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPECONVERTER +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPECONVERTER + +#include "arm_compute/core/ITensorInfo.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "ckw/TensorInfo.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +inline ckw::DataType to_ckw(DataType dt) +{ + switch(dt) + { + case DataType::F32: + return ckw::DataType::Fp32; + case DataType::F16: + return ckw::DataType::Fp16; + case DataType::S32: + return ckw::DataType::Int32; + case DataType::S16: + return ckw::DataType::Int16; + case DataType::S8: + return ckw::DataType::Int8; + case DataType::U32: + return ckw::DataType::Uint32; + case DataType::U16: + return ckw::DataType::Uint16; + case DataType::U8: + return ckw::DataType::Uint8; + default: + return ckw::DataType::Unknown; + } +} + +inline ckw::TensorShape to_ckw(const TensorShape &shape) +{ + ARM_COMPUTE_ERROR_ON(shape.num_max_dimensions < std::tuple_size {}); + ARM_COMPUTE_ERROR_ON(std::tuple_size {} != 5); + /// NOTE: Overflow danger. Use size_t? + return ckw::TensorShape + { + static_cast(shape[0]), + static_cast(shape[1]), + static_cast(shape[2]), + static_cast(shape[3]), + static_cast(shape[4]) + }; +} +inline ckw::TensorDataLayout to_ckw(DataLayout dl) +{ + switch(dl) + { + case DataLayout::NHWC: + return ckw::TensorDataLayout::Nhwc; + case DataLayout::NDHWC: + return ckw::TensorDataLayout::Ndhwc; + default: + return ckw::TensorDataLayout::Unknown; + } +} +inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info) +{ + return ckw::TensorInfo + { + to_ckw(tensor_info.data_type()), + to_ckw(tensor_info.tensor_shape()), + to_ckw(tensor_info.data_layout()), + tensor_info.id() + }; +} +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPECONVERTER */ diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h new file mode 100644 index 0000000000..d94ebd5ce9 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_WRITERHELPER +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_WRITERHELPER + +#include "acl/AclComponentArgument.h" +#include "acl/AclScopedKernelWriter.h" +#include "ckw/TensorTileSampler.h" + +#include + +using namespace ckw; +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +using SamplerCreator = std::function; + +/** Load lhs and rhs tiles of dimension [m0, n0] only when not loaded and prepare the sampler + */ +inline void load_lhs_rhs_tiles_and_prepare_sampler(AclScopedKernelWriter &writer, AclComponentArgument *lhs, AclComponentArgument *rhs, int32_t m0, int32_t n0, SamplerCreator create_sampler) +{ + if(!lhs->has_tile() && !rhs->has_tile()) + { + const auto sampler = create_sampler(writer, m0, n0); + + writer->op_load_once(lhs, sampler); + writer->op_load_once(rhs, sampler); + } + else if(lhs->has_tile()) + { + const auto &sampler = lhs->tile_sampler(); + writer->op_load_once(rhs, sampler); + } + else + { + const auto &sampler = rhs->tile_sampler(); + writer->op_load_once(lhs, sampler); + } +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_WRITERHELPER */ -- cgit v1.2.1