From 66f3d380cacb154748fdb2ac827da2377a2d1910 Mon Sep 17 00:00:00 2001 From: Adnan AlSinan Date: Mon, 10 Jul 2023 15:07:45 +0100 Subject: Port ClTemplateCast into Ckw Resolves COMPMID-6257 Signed-off-by: Adnan AlSinan Change-Id: I3e56ff1f1109924da02d0abd0354a3f1fa095ee7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9914 Tested-by: Arm Jenkins Reviewed-by: Nikolaj Jensen Comments-Addressed: Arm Jenkins Benchmark: Arm Jenkins --- Android.bp | 1 + .../prototype/include/ckw/types/Operators.h | 3 + compute_kernel_writer/prototype/src/Prototype.h | 8 +- filelist.json | 1 + .../gpu/ckw_driver/components/GpuCkwCast.cpp | 177 +++++++++++++++++++++ .../sketch/gpu/ckw_driver/components/GpuCkwCast.h | 68 ++++++++ .../ckw_driver/components/utils/TypeConverter.h | 2 + .../sketch/gpu/components/cl/ClComponentCast.cpp | 11 +- .../sketch/gpu/components/cl/ClComponentCast.h | 6 +- 9 files changed, 272 insertions(+), 5 deletions(-) create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h diff --git a/Android.bp b/Android.bp index c60dc04755..821d385141 100644 --- a/Android.bp +++ b/Android.bp @@ -639,6 +639,7 @@ cc_library_static { "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp", diff --git a/compute_kernel_writer/prototype/include/ckw/types/Operators.h b/compute_kernel_writer/prototype/include/ckw/types/Operators.h index 78027f1ed5..172650d5ae 100644 --- a/compute_kernel_writer/prototype/include/ckw/types/Operators.h +++ b/compute_kernel_writer/prototype/include/ckw/types/Operators.h @@ -33,6 +33,7 @@ namespace ckw enum class UnaryOp : int32_t { LogicalNot = 0x0000, // ! + BitwiseNot = 0x0001, // ~ }; /* Binary operations @@ -60,6 +61,8 @@ enum class BinaryOp : int32_t // Logical LogicalAnd = 0x3000, // && LogicalOr = 0x3001, // || + // Bitwise + BitwiseXOR = 0x4000, // ^ }; enum class AssignmentOp : int32_t diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h index b9f1efa542..72fa419fc2 100644 --- a/compute_kernel_writer/prototype/src/Prototype.h +++ b/compute_kernel_writer/prototype/src/Prototype.h @@ -1581,6 +1581,8 @@ inline std::string to_string(UnaryOp op) { case UnaryOp::LogicalNot: return "!"; + case UnaryOp::BitwiseNot: + return "~"; default: assert(false); return ""; @@ -1615,6 +1617,8 @@ inline std::string to_string(BinaryOp op) return "&&"; case BinaryOp::LogicalOr: return "||"; + case BinaryOp::BitwiseXOR: + return "^"; default: assert(false); return ""; @@ -3570,11 +3574,11 @@ public: OperandUnpacker operands(_data->tiles, _data->arguments); const IVectorTile *src = operands.unpack(o_src); const IVectorTile *dst = operands.unpack(o_dst); - // const int32_t dst_w = dst->format().w; const int32_t dst_h = dst->format().h; const std::string dt = dst->underlying_source_variables()[0].type.str; - const std::string sat = (policy == ConvertPolicy::Saturate ? "_sat" : ""); + const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16); + const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : ""); // Broadcasting on Y is automatic for(int32_t y = 0; y < dst_h; ++y) diff --git a/filelist.json b/filelist.json index 7870729ba0..304488ea36 100644 --- a/filelist.json +++ b/filelist.json @@ -2341,6 +2341,7 @@ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp", diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp new file mode 100644 index 0000000000..96874aa820 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp @@ -0,0 +1,177 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "GpuCkwCast.h" + +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" +#include "arm_compute/core/Error.h" +#include "arm_compute/core/Validate.h" +#include "ckw/TensorTileSampler.h" +#include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h" +#include + +using namespace ckw; +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +namespace +{ +/** Create a simple sampler from tile of dimension [m0, n0] + */ +inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_t m0, int32_t n0) +{ + TensorTileSampler sampler; + + auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32); + auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32); + auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32); + + auto &const_0 = writer->declare_tile("0", 0); + writer->op_get_global_id(gid_0, 0); + writer->op_get_global_id(gid_1, 1); + writer->op_get_global_id(gid_2, 2); + + auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32); + auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32); + auto &m0_t = writer->declare_tile("m0", m0); + auto &n0_t = writer->declare_tile("n0", n0); + writer->op_binary_expression(x_coord, gid_0, BinaryOp::Mul, n0_t); + writer->op_binary_expression(y_coord, gid_1, BinaryOp::Mul, m0_t); + + sampler.x(x_coord); + sampler.y(y_coord); + sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension + sampler.b(gid_2); + + sampler.width(n0); + sampler.height(m0); + + sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension + sampler.address_mode_x(TensorSamplerAddressModeX::None); + sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder); + sampler.address_mode_z(TensorSamplerAddressModeZ::Skip); // Dimensions higher than 3 not supported yet + + return sampler; +} +} // namespace + +GpuCkwCast::GpuCkwCast(ComponentId id, + const ArgumentPack &tensors, + const Attributes &attributes) + : IGpuCkwComponentDriver{ id, tensors }, + _src{}, + _dst{}, + _attributes{ attributes } +{ + _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); + _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); + ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); +} + +void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const +{ + const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window(); + const unsigned int n0 = root_window.x().step(); + const unsigned int m0 = root_window.y().step(); + + GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + + // Load the source tile and prepare the sampler. + if(!src->has_tile()) + { + const auto sampler = create_sampler(writer, m0, n0); + writer->op_load_once(src, sampler); + } + else + { + const auto &sampler = src->tile_sampler(); + writer->op_load_once(src, sampler); + } + + const auto &src_tile = src->tile(); + const auto &sampler = src->tile_sampler(); + + // Prepare the output tile. + if(!dst->has_tile()) + { + // Get Target datatype and convert it to ckw::DataType. + ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type()); + + // Create dst_tile based on src_tile dimensions and with target DataType. + const TileInfo src_tile_info = src_tile.tile_info(); + const TileInfo dst_tile_info = TileInfo(target_dt, src_tile_info.height(), src_tile_info.width()); + + // Declare dst_tile + auto &tile = writer->declare_tile("dst_tile", dst_tile_info); + dst->init_virtual_tensor(tile, sampler); + } + + const auto &dst_tile = dst->tile(); + + // Check if this op is cast-down or cast-up + const size_t src_size = data_size_from_type(_src->data_type()); + const size_t dst_size = data_size_from_type(_dst->data_type()); + const bool cast_down = (src_size >= dst_size); + + if(cast_down && is_data_type_quantized(_src->data_type())) + { + const auto &constant_x80 = writer->declare_tile("0x80", 0x80); + writer->op_binary_expression(src_tile, src_tile, BinaryOp::BitwiseXOR, constant_x80); + } + + ckw::ConvertPolicy convert_policy = ckw::ConvertPolicy::None; + + if(cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE)) + { + convert_policy = ckw::ConvertPolicy::Saturate; + } + + writer->op_cast_expression(dst_tile, src_tile, convert_policy); +} + +Window GpuCkwCast::get_window() const +{ + ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); + + TensorShape output_shape = _dst->tensor_shape(); + // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged + // This is in line with the collapsing convention used by operators like Conv2d + output_shape.collapse(2U, 1U); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + + return win; +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h new file mode 100644 index 0000000000..821cec1e19 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h @@ -0,0 +1,68 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWCAST +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWCAST + +#include "src/core/common/Macros.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h" +#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +class GpuCkwCast : public IGpuCkwComponentDriver +{ +public: + using Attributes = ClComponentCast::Attributes; + /** Constructor + * + * For supported configurations please refer to @ref ClComponentCast::validate() + * + * @param[in] id Component id + * @param[in] tensors Tensor arguments to the component + * @param[in] attributes Component attributes + */ + GpuCkwCast(ComponentId id, + const ArgumentPack &tensors, + const Attributes &attributes); + ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwCast); + /** Destructor */ + ~GpuCkwCast() override = default; + // Inherited methods overriden: + virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override; + Window get_window() const override; + +private: + const ITensorInfo *_src; + const ITensorInfo *_dst; + Attributes _attributes; +}; +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWCAST */ diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h index 2531fb7379..9027bddd76 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h @@ -48,12 +48,14 @@ inline ckw::DataType to_ckw(DataType dt) case DataType::S16: return ckw::DataType::Int16; case DataType::S8: + case DataType::QASYMM8_SIGNED: return ckw::DataType::Int8; case DataType::U32: return ckw::DataType::Uint32; case DataType::U16: return ckw::DataType::Uint16; case DataType::U8: + case DataType::QASYMM8: return ckw::DataType::Uint8; default: return ckw::DataType::Unknown; diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp index 007ba6380c..92933ae7a5 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,7 @@ #include "arm_compute/core/Error.h" #include "src/core/CL/CLValidate.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h" #include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h" namespace arm_compute @@ -66,7 +67,8 @@ ClComponentCast::ClComponentCast(ComponentId id, const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{ id, properties, tensors }, - _component_writer{ std::make_unique(id, tensors, attributes) } + _component_writer{ std::make_unique(id, tensors, attributes) }, + _ckw_driver{ std::make_unique(id, tensors, attributes) } { ARM_COMPUTE_UNUSED(attributes, settings); } @@ -77,6 +79,11 @@ const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const { return _component_writer.get(); } + +const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const +{ + return _ckw_driver.get(); +} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h index 84d6f07f16..174f9670b3 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -49,6 +49,7 @@ private: /** Forward declaration */ class ClTemplateCast; +class GpuCkwCast; class ClComponentCast final : public IGpuKernelComponent { @@ -117,6 +118,8 @@ public: ClComponentCast &operator=(ClComponentCast &&component) = default; /** Get template writer for the component */ const IGpuTemplateComponentWriter *template_writer() const override; + /** Get GPU kernel writer for the component */ + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -125,6 +128,7 @@ public: private: std::unique_ptr _component_writer; + std::unique_ptr _ckw_driver; }; } // namespace dynamic_fusion } // namespace experimental -- cgit v1.2.1