aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorAdnan AlSinan <adnan.alsinan@arm.com>2023-07-10 15:07:45 +0100
committerSiCong Li <sicong.li@arm.com>2023-07-14 15:06:16 +0000
commit66f3d380cacb154748fdb2ac827da2377a2d1910 (patch)
treef7156ab390cd6e8a7d15b4403cca04542f1da1d5
parent4184e86508c3b1a744e9687d1112ba5f65f55eeb (diff)
downloadComputeLibrary-66f3d380cacb154748fdb2ac827da2377a2d1910.tar.gz
Port ClTemplateCast into Ckw
Resolves COMPMID-6257 Signed-off-by: Adnan AlSinan <adnan.alsinan@arm.com> Change-Id: I3e56ff1f1109924da02d0abd0354a3f1fa095ee7 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9914 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Nikolaj Jensen <nikolaj.jensen@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp1
-rw-r--r--compute_kernel_writer/prototype/include/ckw/types/Operators.h3
-rw-r--r--compute_kernel_writer/prototype/src/Prototype.h8
-rw-r--r--filelist.json1
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp177
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h68
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h2
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp11
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h6
9 files changed, 272 insertions, 5 deletions
diff --git a/Android.bp b/Android.bp
index c60dc04755..821d385141 100644
--- a/Android.bp
+++ b/Android.bp
@@ -639,6 +639,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
diff --git a/compute_kernel_writer/prototype/include/ckw/types/Operators.h b/compute_kernel_writer/prototype/include/ckw/types/Operators.h
index 78027f1ed5..172650d5ae 100644
--- a/compute_kernel_writer/prototype/include/ckw/types/Operators.h
+++ b/compute_kernel_writer/prototype/include/ckw/types/Operators.h
@@ -33,6 +33,7 @@ namespace ckw
enum class UnaryOp : int32_t
{
LogicalNot = 0x0000, // !
+ BitwiseNot = 0x0001, // ~
};
/* Binary operations
@@ -60,6 +61,8 @@ enum class BinaryOp : int32_t
// Logical
LogicalAnd = 0x3000, // &&
LogicalOr = 0x3001, // ||
+ // Bitwise
+ BitwiseXOR = 0x4000, // ^
};
enum class AssignmentOp : int32_t
diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h
index b9f1efa542..72fa419fc2 100644
--- a/compute_kernel_writer/prototype/src/Prototype.h
+++ b/compute_kernel_writer/prototype/src/Prototype.h
@@ -1581,6 +1581,8 @@ inline std::string to_string(UnaryOp op)
{
case UnaryOp::LogicalNot:
return "!";
+ case UnaryOp::BitwiseNot:
+ return "~";
default:
assert(false);
return "";
@@ -1615,6 +1617,8 @@ inline std::string to_string(BinaryOp op)
return "&&";
case BinaryOp::LogicalOr:
return "||";
+ case BinaryOp::BitwiseXOR:
+ return "^";
default:
assert(false);
return "";
@@ -3570,11 +3574,11 @@ public:
OperandUnpacker operands(_data->tiles, _data->arguments);
const IVectorTile *src = operands.unpack(o_src);
const IVectorTile *dst = operands.unpack(o_dst);
-
// const int32_t dst_w = dst->format().w;
const int32_t dst_h = dst->format().h;
const std::string dt = dst->underlying_source_variables()[0].type.str;
- const std::string sat = (policy == ConvertPolicy::Saturate ? "_sat" : "");
+ const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16);
+ const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : "");
// Broadcasting on Y is automatic
for(int32_t y = 0; y < dst_h; ++y)
diff --git a/filelist.json b/filelist.json
index 7870729ba0..304488ea36 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2341,6 +2341,7 @@
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp",
+ "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp",
"src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp",
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
new file mode 100644
index 0000000000..96874aa820
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -0,0 +1,177 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "GpuCkwCast.h"
+
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "arm_compute/core/Error.h"
+#include "arm_compute/core/Validate.h"
+#include "ckw/TensorTileSampler.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
+#include <string>
+
+using namespace ckw;
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+/** Create a simple sampler from tile of dimension [m0, n0]
+ */
+inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_t m0, int32_t n0)
+{
+ TensorTileSampler sampler;
+
+ auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32);
+ auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
+ auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32);
+
+ auto &const_0 = writer->declare_tile("0", 0);
+ writer->op_get_global_id(gid_0, 0);
+ writer->op_get_global_id(gid_1, 1);
+ writer->op_get_global_id(gid_2, 2);
+
+ auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32);
+ auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32);
+ auto &m0_t = writer->declare_tile("m0", m0);
+ auto &n0_t = writer->declare_tile("n0", n0);
+ writer->op_binary_expression(x_coord, gid_0, BinaryOp::Mul, n0_t);
+ writer->op_binary_expression(y_coord, gid_1, BinaryOp::Mul, m0_t);
+
+ sampler.x(x_coord);
+ sampler.y(y_coord);
+ sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension
+ sampler.b(gid_2);
+
+ sampler.width(n0);
+ sampler.height(m0);
+
+ sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
+ sampler.address_mode_x(TensorSamplerAddressModeX::None);
+ sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder);
+ sampler.address_mode_z(TensorSamplerAddressModeZ::Skip); // Dimensions higher than 3 not supported yet
+
+ return sampler;
+}
+} // namespace
+
+GpuCkwCast::GpuCkwCast(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
+ : IGpuCkwComponentDriver{ id, tensors },
+ _src{},
+ _dst{},
+ _attributes{ attributes }
+{
+ _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
+ _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
+}
+
+void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+{
+ const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
+ const unsigned int n0 = root_window.x().step();
+ const unsigned int m0 = root_window.y().step();
+
+ GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+
+ // Load the source tile and prepare the sampler.
+ if(!src->has_tile())
+ {
+ const auto sampler = create_sampler(writer, m0, n0);
+ writer->op_load_once(src, sampler);
+ }
+ else
+ {
+ const auto &sampler = src->tile_sampler();
+ writer->op_load_once(src, sampler);
+ }
+
+ const auto &src_tile = src->tile();
+ const auto &sampler = src->tile_sampler();
+
+ // Prepare the output tile.
+ if(!dst->has_tile())
+ {
+ // Get Target datatype and convert it to ckw::DataType.
+ ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
+
+ // Create dst_tile based on src_tile dimensions and with target DataType.
+ const TileInfo src_tile_info = src_tile.tile_info();
+ const TileInfo dst_tile_info = TileInfo(target_dt, src_tile_info.height(), src_tile_info.width());
+
+ // Declare dst_tile
+ auto &tile = writer->declare_tile("dst_tile", dst_tile_info);
+ dst->init_virtual_tensor(tile, sampler);
+ }
+
+ const auto &dst_tile = dst->tile();
+
+ // Check if this op is cast-down or cast-up
+ const size_t src_size = data_size_from_type(_src->data_type());
+ const size_t dst_size = data_size_from_type(_dst->data_type());
+ const bool cast_down = (src_size >= dst_size);
+
+ if(cast_down && is_data_type_quantized(_src->data_type()))
+ {
+ const auto &constant_x80 = writer->declare_tile("0x80", 0x80);
+ writer->op_binary_expression(src_tile, src_tile, BinaryOp::BitwiseXOR, constant_x80);
+ }
+
+ ckw::ConvertPolicy convert_policy = ckw::ConvertPolicy::None;
+
+ if(cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
+ {
+ convert_policy = ckw::ConvertPolicy::Saturate;
+ }
+
+ writer->op_cast_expression(dst_tile, src_tile, convert_policy);
+}
+
+Window GpuCkwCast::get_window() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+
+ TensorShape output_shape = _dst->tensor_shape();
+ // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
+ // This is in line with the collapsing convention used by operators like Conv2d
+ output_shape.collapse(2U, 1U);
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+
+ return win;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
new file mode 100644
index 0000000000..821cec1e19
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
@@ -0,0 +1,68 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWCAST
+#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWCAST
+
+#include "src/core/common/Macros.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class GpuCkwCast : public IGpuCkwComponentDriver
+{
+public:
+ using Attributes = ClComponentCast::Attributes;
+ /** Constructor
+ *
+ * For supported configurations please refer to @ref ClComponentCast::validate()
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the component
+ * @param[in] attributes Component attributes
+ */
+ GpuCkwCast(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes);
+ ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwCast);
+ /** Destructor */
+ ~GpuCkwCast() override = default;
+ // Inherited methods overriden:
+ virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
+
+private:
+ const ITensorInfo *_src;
+ const ITensorInfo *_dst;
+ Attributes _attributes;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+
+#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWCAST */
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
index 2531fb7379..9027bddd76 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
@@ -48,12 +48,14 @@ inline ckw::DataType to_ckw(DataType dt)
case DataType::S16:
return ckw::DataType::Int16;
case DataType::S8:
+ case DataType::QASYMM8_SIGNED:
return ckw::DataType::Int8;
case DataType::U32:
return ckw::DataType::Uint32;
case DataType::U16:
return ckw::DataType::Uint16;
case DataType::U8:
+ case DataType::QASYMM8:
return ckw::DataType::Uint8;
default:
return ckw::DataType::Unknown;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
index 007ba6380c..92933ae7a5 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -26,6 +26,7 @@
#include "arm_compute/core/Error.h"
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h"
namespace arm_compute
@@ -66,7 +67,8 @@ ClComponentCast::ClComponentCast(ComponentId id,
const Attributes &attributes,
const Settings &settings)
: IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateCast>(id, tensors, attributes) }
+ _component_writer{ std::make_unique<ClTemplateCast>(id, tensors, attributes) },
+ _ckw_driver{ std::make_unique<GpuCkwCast>(id, tensors, attributes) }
{
ARM_COMPUTE_UNUSED(attributes, settings);
}
@@ -77,6 +79,11 @@ const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const
{
return _component_writer.get();
}
+
+const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const
+{
+ return _ckw_driver.get();
+}
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
index 84d6f07f16..174f9670b3 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2022 Arm Limited.
+ * Copyright (c) 2022-2023 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -49,6 +49,7 @@ private:
/** Forward declaration */
class ClTemplateCast;
+class GpuCkwCast;
class ClComponentCast final : public IGpuKernelComponent
{
@@ -117,6 +118,8 @@ public:
ClComponentCast &operator=(ClComponentCast &&component) = default;
/** Get template writer for the component */
const IGpuTemplateComponentWriter *template_writer() const override;
+ /** Get GPU kernel writer for the component */
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
/** Get component type */
GpuComponentType type() const override
{
@@ -125,6 +128,7 @@ public:
private:
std::unique_ptr<ClTemplateCast> _component_writer;
+ std::unique_ptr<GpuCkwCast> _ckw_driver;
};
} // namespace dynamic_fusion
} // namespace experimental