aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2023-05-30 09:34:32 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2023-06-27 09:42:51 +0000
commitbd4f6b9ed37ed7a222e36ce6823ba96396f60deb (patch)
treed1117a182d2d5fe8d2cef1ed631e3723b2aca354
parent8c49f16e5909a9bd5dc6e68638d2e2d8acc2fc66 (diff)
downloadComputeLibrary-bd4f6b9ed37ed7a222e36ce6823ba96396f60deb.tar.gz
Compute kernel writer API and prototype
* Add the public API for compute kernel writer. * Use the prototype as the implementation of the public API. Resolves: COMPMID-5790 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I9d80e15325e1d953feb87c1f2eb61a587bb9ab5e Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9814 Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp9
-rw-r--r--compute_kernel_writer/.clang-format47
-rw-r--r--compute_kernel_writer/CMakeLists.txt28
-rw-r--r--compute_kernel_writer/README.md1
-rw-r--r--compute_kernel_writer/examples/add_exp_store.cpp181
-rw-r--r--compute_kernel_writer/include/acl/AclComponentArgument.h111
-rw-r--r--compute_kernel_writer/include/acl/AclKernelWriter.h56
-rw-r--r--compute_kernel_writer/include/acl/AclScopedKernelWriter.h62
-rw-r--r--compute_kernel_writer/include/ckw/Error.h66
-rw-r--r--compute_kernel_writer/include/ckw/Kernel.h77
-rw-r--r--compute_kernel_writer/include/ckw/KernelWriter.h217
-rw-r--r--compute_kernel_writer/include/ckw/OperandBase.h76
-rw-r--r--compute_kernel_writer/include/ckw/ScalarValue.h137
-rw-r--r--compute_kernel_writer/include/ckw/TensorOperand.h181
-rw-r--r--compute_kernel_writer/include/ckw/TensorTileSampler.h163
-rw-r--r--compute_kernel_writer/include/ckw/TileInfo.h4
-rw-r--r--compute_kernel_writer/include/ckw/TileOperand.h110
-rw-r--r--compute_kernel_writer/include/ckw/Types.h119
-rw-r--r--compute_kernel_writer/src/Kernel.cpp61
-rw-r--r--compute_kernel_writer/src/KernelWriter.cpp227
-rw-r--r--compute_kernel_writer/src/OperandBase.cpp50
-rw-r--r--compute_kernel_writer/src/Prototype.h3742
-rw-r--r--compute_kernel_writer/src/TensorOperand.cpp247
-rw-r--r--compute_kernel_writer/src/TensorTileSampler.cpp167
-rw-r--r--compute_kernel_writer/src/TileInfo.cpp2
-rw-r--r--compute_kernel_writer/src/TileOperand.cpp104
-rw-r--r--compute_kernel_writer/src/acl/AclComponentArgument.cpp97
-rw-r--r--compute_kernel_writer/src/acl/AclKernelWriter.cpp50
-rw-r--r--compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp58
29 files changed, 6423 insertions, 27 deletions
diff --git a/Android.bp b/Android.bp
index cb41b1ac39..b31c9eebd9 100644
--- a/Android.bp
+++ b/Android.bp
@@ -191,9 +191,18 @@ cc_library_static {
srcs: [
"compute_kernel_writer/src/Error.cpp",
"compute_kernel_writer/src/Helpers.cpp",
+ "compute_kernel_writer/src/Kernel.cpp",
+ "compute_kernel_writer/src/KernelWriter.cpp",
+ "compute_kernel_writer/src/OperandBase.cpp",
"compute_kernel_writer/src/TensorInfo.cpp",
+ "compute_kernel_writer/src/TensorOperand.cpp",
+ "compute_kernel_writer/src/TensorTileSampler.cpp",
"compute_kernel_writer/src/TensorUtils.cpp",
"compute_kernel_writer/src/TileInfo.cpp",
+ "compute_kernel_writer/src/TileOperand.cpp",
+ "compute_kernel_writer/src/acl/AclComponentArgument.cpp",
+ "compute_kernel_writer/src/acl/AclKernelWriter.cpp",
+ "compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp",
"compute_kernel_writer/src/cl/CLConstantTile.cpp",
"compute_kernel_writer/src/cl/CLHelpers.cpp",
"compute_kernel_writer/src/cl/CLTile.cpp",
diff --git a/compute_kernel_writer/.clang-format b/compute_kernel_writer/.clang-format
new file mode 100644
index 0000000000..b2b21d75f3
--- /dev/null
+++ b/compute_kernel_writer/.clang-format
@@ -0,0 +1,47 @@
+---
+Language: Cpp
+AccessModifierOffset: -4
+AlignAfterOpenBracket: Align
+AlignConsecutiveAssignments: true
+AlignConsecutiveDeclarations: true
+AlignEscapedNewlinesLeft: true
+AlignTrailingComments: true
+AllowShortBlocksOnASingleLine: false
+AllowShortCaseLabelsOnASingleLine: false
+AllowShortFunctionsOnASingleLine: None
+AllowShortIfStatementsOnASingleLine: false
+AllowShortLoopsOnASingleLine: false
+AlwaysBreakAfterDefinitionReturnType: None
+AlwaysBreakAfterReturnType: None
+AlwaysBreakBeforeMultilineStrings: true
+AlwaysBreakTemplateDeclarations: true
+BreakBeforeBinaryOperators: NonAssignment
+BreakBeforeBraces: Allman
+BreakBeforeTernaryOperators: false
+BreakConstructorInitializersBeforeComma: false
+ConstructorInitializerAllOnOneLineOrOnePerLine: true
+Cpp11BracedListStyle: false
+DerivePointerAlignment: false
+IndentCaseLabels: true
+IndentWidth: 4
+IndentWrappedFunctionNames: false
+KeepEmptyLinesAtTheStartOfBlocks: false
+MaxEmptyLinesToKeep: 1
+NamespaceIndentation: None
+PointerAlignment: Right
+SortIncludes: true
+SpaceAfterCStyleCast: false
+SpaceBeforeAssignmentOperators: true
+SpaceBeforeParens: Never
+SpaceInEmptyParentheses: false
+SpacesInAngles: false
+SpacesInCStyleCastParentheses: false
+SpacesInParentheses: false
+SpacesInSquareBrackets: false
+Standard: c++14
+TabWidth: 4
+UseTab: Never
+ReflowComments: false
+ContinuationIndentWidth: 4
+ColumnLimit: 0
+---
diff --git a/compute_kernel_writer/CMakeLists.txt b/compute_kernel_writer/CMakeLists.txt
index 93372de3db..4bf8494484 100644
--- a/compute_kernel_writer/CMakeLists.txt
+++ b/compute_kernel_writer/CMakeLists.txt
@@ -50,6 +50,7 @@ set(CMAKE_CXX_FLAGS_RELEASE "${CMAKE_CXX_FLAGS_RELEASE} -Os")
option(CKW_ENABLE_OPENCL "Enable OpenCL code generation" OFF)
option(CKW_ENABLE_ASSERTS "Enable assertions. Always enabled in Debug builds" OFF)
option(CKW_BUILD_TESTING "Build the Compute Kernel Writer validation test suite" OFF)
+option(CKW_BUILD_EXAMPLES "Build the Compute Kernel Writer examples" OFF)
option(CKW_CCACHE "Enable compiler cache builds" OFF)
#---------------------------------------------------------------------
@@ -104,7 +105,19 @@ target_sources(ckw PRIVATE
src/TensorInfo.cpp
src/TensorUtils.cpp
src/TileInfo.cpp
- )
+
+ src/Kernel.cpp
+ src/KernelWriter.cpp
+ src/OperandBase.cpp
+ src/TileOperand.cpp
+ src/TensorOperand.cpp
+ src/TensorTileSampler.cpp
+
+ src/acl/AclKernelWriter.cpp
+ src/acl/AclScopedKernelWriter.cpp
+ src/acl/AclComponentArgument.cpp
+)
+
if(CKW_ENABLE_OPENCL)
target_sources(ckw PRIVATE
src/cl/CLConstantTile.cpp
@@ -143,6 +156,19 @@ if(CKW_BUILD_TESTING)
endif()
#---------------------------------------------------------------------
+# Example
+
+if(CKW_BUILD_EXAMPLES)
+ add_executable(ckw_example_add_exp_store
+ examples/add_exp_store.cpp
+ )
+
+ target_link_libraries(ckw_example_add_exp_store
+ PUBLIC ckw
+ )
+endif()
+
+#---------------------------------------------------------------------
# Installing
install(TARGETS ckw
diff --git a/compute_kernel_writer/README.md b/compute_kernel_writer/README.md
index 8a24fe20ec..650eae2b48 100644
--- a/compute_kernel_writer/README.md
+++ b/compute_kernel_writer/README.md
@@ -82,5 +82,6 @@ This project can be configured with the following build options. Enable options
| CKW_ENABLE_OPENCL | Enable OpenCL code generation. |
| CKW_ENABLE_ASSERTS | Enable assertions. Always enabled for Debug builds. |
| CKW_BUILD_TESTING | Build the validation test suite. |
+| CKW_BUILD_EXAMPLES | Build the examples. |
| CKW_CCACHE | Use compiler cache for faster recompilation. |
| CMAKE_TOOLCHAIN_FILE | When cross-compiling, set this variable to the path of the CMake toolchain file. |
diff --git a/compute_kernel_writer/examples/add_exp_store.cpp b/compute_kernel_writer/examples/add_exp_store.cpp
new file mode 100644
index 0000000000..ab2f3e15fb
--- /dev/null
+++ b/compute_kernel_writer/examples/add_exp_store.cpp
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ckw/Error.h"
+#include "ckw/KernelWriter.h"
+#include "ckw/TensorOperand.h"
+#include "ckw/TensorTileSampler.h"
+#include "ckw/TileOperand.h"
+#include "ckw/Types.h"
+
+#include "acl/AclComponentArgument.h"
+#include "acl/AclKernelWriter.h"
+#include "acl/AclScopedKernelWriter.h"
+
+#include <iostream>
+#include <vector>
+
+using namespace ckw;
+
+TensorTileSampler create_simple_sampler(AclScopedKernelWriter writer)
+{
+ TensorTileSampler sampler;
+
+ constexpr int32_t m0 = 4;
+ constexpr int32_t n0 = 4;
+
+ auto &gid_0 = writer->declare_tile("gid_0", DataType::Int32);
+ auto &gid_1 = writer->declare_tile("gid_1", DataType::Int32);
+ auto &gid_2 = writer->declare_tile("gid_2", DataType::Int32);
+
+ auto &const_0 = writer->declare_tile("0", 0);
+
+ writer->op_get_global_id(gid_0, 0);
+ writer->op_get_global_id(gid_1, 1);
+ writer->op_get_global_id(gid_2, 2);
+
+ sampler.x(gid_0);
+ sampler.y(gid_1);
+ sampler.z(gid_2);
+ sampler.b(const_0);
+
+ sampler.width(n0);
+ sampler.height(m0);
+
+ sampler.format(TensorSamplerFormat::C_WH_1);
+ sampler.address_mode_x(TensorSamplerAddressModeX::None);
+ sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder);
+ sampler.address_mode_z(TensorSamplerAddressModeZ::Skip);
+
+ return sampler;
+}
+
+void op_binary_elementwise(AclScopedKernelWriter writer, std::vector<AclComponentArgument *> operands)
+{
+ auto lhs = operands.at(0);
+ auto rhs = operands.at(1);
+ auto dst = operands.at(2);
+
+ // Load the LHS and RHS tile and prepare the tensor sampler.
+ if(!lhs->has_tile() && !rhs->has_tile())
+ {
+ const auto sampler = create_simple_sampler(writer);
+
+ writer->op_load_once(lhs, sampler);
+ writer->op_load_once(rhs, sampler);
+ }
+ else if(lhs->has_tile())
+ {
+ const auto &sampler = lhs->tile_sampler();
+ writer->op_load_once(rhs, sampler);
+ }
+ else
+ {
+ const auto &sampler = rhs->tile_sampler();
+ writer->op_load_once(lhs, sampler);
+ }
+
+ auto &lhs_tile = lhs->tile();
+ auto &rhs_tile = rhs->tile();
+ const auto &sampler = lhs->tile_sampler();
+
+ // Prepare the output tile.
+ if(!dst->has_tile())
+ {
+ auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info());
+ dst->init_virtual_tensor(tile, sampler);
+ }
+
+ auto &dst_tile = dst->tile();
+
+ // Perform the operation.
+ writer->op_binary_expression(dst_tile, lhs_tile, rhs_tile, BinaryOp::Add);
+}
+
+void op_exp(AclScopedKernelWriter writer, std::vector<AclComponentArgument *> operands)
+{
+ auto src = operands.at(0);
+ auto dst = operands.at(1);
+
+ // Load the source tile and prepare the sampler.
+ if(!src->has_tile())
+ {
+ const auto sampler = create_simple_sampler(writer);
+ writer->op_load_once(src, sampler);
+ }
+
+ auto &src_tile = src->tile();
+ const auto &sampler = src->tile_sampler();
+
+ // Prepare the output tile.
+ if(!dst->has_tile())
+ {
+ auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info());
+ dst->init_virtual_tensor(tile, sampler);
+ }
+
+ auto &dst_tile = dst->tile();
+
+ // Perform the operation.
+ writer->op_scalar_function(dst_tile, src_tile, ScalarUnaryFunction::Exp);
+}
+
+void op_store(AclScopedKernelWriter writer, std::vector<AclComponentArgument *> operands)
+{
+ auto src = operands.at(0);
+ auto dst = operands.at(1);
+
+ auto &src_tile = src->tile();
+ const auto &sampler = src->tile_sampler();
+ auto &dst_tensor = dst->tensor();
+
+ writer->op_store(dst_tensor, src_tile, sampler);
+}
+
+int main()
+{
+ Kernel kernel("example", GpuTargetLanguage::OpenCL);
+ AclKernelWriter root_writer(kernel);
+
+ AclScopedKernelWriter writer(&root_writer);
+
+ const TensorInfo src0_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 0);
+ const TensorInfo src1_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 1);
+ const TensorInfo dst_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 2);
+
+ AclComponentArgument src0(writer->create_tensor_argument("src0", src0_info));
+ AclComponentArgument src1(writer->create_tensor_argument("src1", src1_info));
+ AclComponentArgument dst(writer->create_tensor_argument("dst", dst_info));
+
+ AclComponentArgument ans;
+
+ op_binary_elementwise(writer, { &src0, &src1, &ans });
+ op_exp(writer, { &ans, &ans });
+ op_store(writer, { &ans, &dst });
+
+ const auto code = root_writer.generate_code();
+ std::cout << code;
+
+ return 0;
+}
diff --git a/compute_kernel_writer/include/acl/AclComponentArgument.h b/compute_kernel_writer/include/acl/AclComponentArgument.h
new file mode 100644
index 0000000000..485b7a30bc
--- /dev/null
+++ b/compute_kernel_writer/include/acl/AclComponentArgument.h
@@ -0,0 +1,111 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_ACL_ACLCOMPONENTARGUMENT_H
+#define CKW_INCLUDE_ACL_ACLCOMPONENTARGUMENT_H
+
+#include "ckw/TensorTileSampler.h"
+
+namespace ckw
+{
+class TensorOperand;
+class TileOperand;
+} // namespace ckw
+
+/** The argument of a dynamic fusion component which can be either user tensor or virtual tensor. */
+class AclComponentArgument
+{
+public:
+ /** Initialize a new instance of @ref AclComponentArgument class for empty virtual tensor. */
+ AclComponentArgument();
+
+ /** Initialize a new instance of @ref AclComponentArgument class for user tensor.
+ *
+ * @param[in] tensor The user tensor.
+ */
+ explicit AclComponentArgument(ckw::TensorOperand &tensor);
+
+ /** Set virtual tensor information (tile, sampler) for the argument.
+ *
+ * If the component is a user tensor, it can be treated as virtual tensor as well
+ * and won't be loaded again using @ref AclKernelWriter::op_load_once method.
+ *
+ * @param[in] tile The tile that has been loaded.
+ * @param[in] sampler The tensor sampling information that has been used to load the tile.
+ */
+ AclComponentArgument &init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &sampler);
+
+ /** Get whether the argument is a user tensor. */
+ bool has_tensor() const;
+
+ /** Get the tensor operand.
+ *
+ * If the tensor is not available, throw an error.
+ */
+ ckw::TensorOperand &tensor();
+
+ /** Get the tensor operand.
+ *
+ * If the tensor is not available, throw an error.
+ */
+ const ckw::TensorOperand &tensor() const;
+
+ /** Get whether the argument contains a tile.
+ *
+ * The argument can be either a user tensor that has been loaded,
+ * or a virtual tensor (i.e. a tile with tensor sampling information).
+ */
+ bool has_tile() const;
+
+ /** Get the tile operand.
+ *
+ * If the tile is not available, throw an error.
+ */
+ ckw::TileOperand &tile();
+
+ /** Get the tile operand.
+ *
+ * If the tile is not available, throw an error.
+ */
+ const ckw::TileOperand &tile() const;
+
+ /** Get the tensor sampling information for the tile.
+ *
+ * If the tile is not available, throw an error.
+ */
+ ckw::TensorTileSampler &tile_sampler();
+
+ /** Get the tensor sampling information for the tile.
+ *
+ * If the tile is not available, throw an error.
+ */
+ const ckw::TensorTileSampler &tile_sampler() const;
+
+private:
+ ckw::TensorOperand *_tensor{ nullptr };
+ ckw::TileOperand *_tile{ nullptr };
+ ckw::TensorTileSampler _tile_sampler{};
+};
+
+#endif // CKW_INCLUDE_ACL_ACLCOMPONENTARGUMENT_H
diff --git a/compute_kernel_writer/include/acl/AclKernelWriter.h b/compute_kernel_writer/include/acl/AclKernelWriter.h
new file mode 100644
index 0000000000..27b7add6ef
--- /dev/null
+++ b/compute_kernel_writer/include/acl/AclKernelWriter.h
@@ -0,0 +1,56 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_ACL_ACLKERNELWRITER_H
+#define CKW_INCLUDE_ACL_ACLKERNELWRITER_H
+
+#include "ckw/KernelWriter.h"
+#include "ckw/TensorTileSampler.h"
+
+class AclComponentArgument;
+
+namespace ckw
+{
+class Kernel;
+} // namespace ckw
+
+/** Extended implementation of kernel writer for dynamic fusion. */
+class AclKernelWriter : public ckw::KernelWriter
+{
+public:
+ /** Initialize a new instance of @ref AclKernelWriter class.
+ *
+ * @param[in] kernel The kernel to be generated.
+ */
+ explicit AclKernelWriter(ckw::Kernel &kernel);
+
+ /** Load the user tensor to the tile in the same component argument if it hasn't been loaded.
+ *
+ * @param[in] tensor_or_tile The component argument that is either a user tensor or a virtual tensor.
+ * @param[in] sampler The tensor sampling information to load the tile.
+ */
+ void op_load_once(AclComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler);
+};
+
+#endif // CKW_INCLUDE_ACL_ACLKERNELWRITER_H
diff --git a/compute_kernel_writer/include/acl/AclScopedKernelWriter.h b/compute_kernel_writer/include/acl/AclScopedKernelWriter.h
new file mode 100644
index 0000000000..6cb957bfb5
--- /dev/null
+++ b/compute_kernel_writer/include/acl/AclScopedKernelWriter.h
@@ -0,0 +1,62 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_ACL_ACLSCOPEDKERNELWRITER_H
+#define CKW_INCLUDE_ACL_ACLSCOPEDKERNELWRITER_H
+
+#include <cstdint>
+
+class AclKernelWriter;
+
+/** Helper to automatically manage kernel writer ID space. */
+class AclScopedKernelWriter
+{
+public:
+ /** Initialize a new instance of @ref AclScopedKernelWriter class. */
+ explicit AclScopedKernelWriter(AclKernelWriter *writer);
+
+ /** Create a new scope from the specified scoped kernel writer. */
+ AclScopedKernelWriter(const AclScopedKernelWriter &other);
+
+ /** Assignment is disallowed. */
+ AclScopedKernelWriter &operator=(const AclScopedKernelWriter &) = delete;
+
+ /** Access the underlying kernel writer. */
+ AclKernelWriter *operator->();
+
+ /** Access the underlying kernel writer. */
+ const AclKernelWriter *operator->() const;
+
+ /** Get the kernel writer. */
+ AclKernelWriter *writer();
+
+ /** Get the kernel writer. */
+ const AclKernelWriter *writer() const;
+
+private:
+ AclKernelWriter *_writer;
+ int32_t _parent_id_space;
+};
+
+#endif // CKW_INCLUDE_ACL_ACLSCOPEDKERNELWRITER_H
diff --git a/compute_kernel_writer/include/ckw/Error.h b/compute_kernel_writer/include/ckw/Error.h
index 996893823e..8c4853722b 100644
--- a/compute_kernel_writer/include/ckw/Error.h
+++ b/compute_kernel_writer/include/ckw/Error.h
@@ -21,11 +21,12 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef COMPUTE_KERNEL_WRITER_INCLUDE_CKW_ERROR_H
-#define COMPUTE_KERNEL_WRITER_INCLUDE_CKW_ERROR_H
-#include <string>
+#ifndef CKW_INCLUDE_CKW_ERROR_H
+#define CKW_INCLUDE_CKW_ERROR_H
+
#include <stdexcept>
+#include <string>
namespace ckw
{
@@ -44,16 +45,59 @@ std::string create_error_msg(const std::string &file, const std::string &func, c
*
* @param[in] msg Message to display.
*/
-#define COMPUTE_KERNEL_WRITER_ERROR_ON_MSG(msg) \
- do \
- { \
- const std::string arg0(__FILE__); \
- const std::string arg1(__func__); \
- const std::string arg2(std::to_string(__LINE__)); \
- const std::string arg3(msg); \
+#define COMPUTE_KERNEL_WRITER_ERROR_ON_MSG(msg) \
+ do \
+ { \
+ const std::string arg0(__FILE__); \
+ const std::string arg1(__func__); \
+ const std::string arg2(std::to_string(__LINE__)); \
+ const std::string arg3(msg); \
std::runtime_error(create_error_msg(arg0, arg1, arg2, arg3)); \
} while(false)
+/** If the condition is not met, throw an std::runtime_error with the specified message.
+ *
+ * @param[in] cond The condition that is expected to be true.
+ * @param[in] msg The error message when the condition is not met.
+ */
+#define CKW_ASSERT_MSG(cond, msg) \
+ do \
+ { \
+ if(!(cond)) \
+ { \
+ throw ::std::runtime_error(msg); \
+ } \
+ } while(false)
+
+/** If the condition is not met, throw an std::runtime_error.
+ *
+ * @param[in] cond The condition that is expected to be true.
+ */
+#define CKW_ASSERT(cond) CKW_ASSERT_MSG(cond, #cond)
+
+/** If the precondition is met but the consequence is not met, throw an std::runtime_error.
+ *
+ * @param[in] precond The condition if is met requires the consequence must also be met.
+ * @param[in] cond The condition that is expected to be true if the precondition is true.
+ */
+#define CKW_ASSERT_IF(precond, cond) \
+ CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond)
+
+/** Mark the variables as unused.
+ *
+ * @param[in] ... Variables which are unused.
+ */
+#define CKW_UNUSED(...) ::ckw::ignore_unused(__VA_ARGS__) // NOLINT
+
+/** Mark the variables as unused.
+ *
+ * @param[in] ... Variables which are unused.
+ */
+template <typename... T>
+inline void ignore_unused(T &&...)
+{
+}
+
} // namespace ckw
-#endif /* COMPUTE_KERNEL_WRITER_INCLUDE_CKW_ERROR_H */
+#endif // CKW_INCLUDE_CKW_ERROR_H
diff --git a/compute_kernel_writer/include/ckw/Kernel.h b/compute_kernel_writer/include/ckw/Kernel.h
new file mode 100644
index 0000000000..cbc7700c22
--- /dev/null
+++ b/compute_kernel_writer/include/ckw/Kernel.h
@@ -0,0 +1,77 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_CKW_KERNEL_H
+#define CKW_INCLUDE_CKW_KERNEL_H
+
+#include "ckw/OperandBase.h"
+#include "ckw/Types.h"
+
+#include <map>
+#include <memory>
+#include <string>
+
+namespace ckw
+{
+
+namespace prototype
+{
+class GpuKernelWriterDataHolder;
+} // namespace prototype
+
+/** The target for kernel writer to write into. */
+class Kernel
+{
+public:
+ /** Constructor
+ *
+ * @param[in] name The name of the kernel function.
+ * @param[in] language The programming language to write the kernel.
+ */
+ Kernel(const char *name, GpuTargetLanguage language);
+
+ /** Destructor */
+ ~Kernel();
+
+ /** Get the name of the kernel function. */
+ const std::string &name() const;
+
+ /** (Internal use only) Get the map from operand name to the operand declared in this kernel. */
+ const ::std::map<::std::string, ::std::unique_ptr<OperandBase>> &operands() const;
+
+ /** (Internal use only) Get the map from operand name to the operand declared in this kernel. */
+ ::std::map<::std::string, ::std::unique_ptr<OperandBase>> &operands();
+
+ /** (Internal use only) Get the implementation data. */
+ prototype::GpuKernelWriterDataHolder *impl();
+
+private:
+ ::std::string _name;
+ ::std::unique_ptr<prototype::GpuKernelWriterDataHolder> _kernel;
+ ::std::map<::std::string, ::std::unique_ptr<OperandBase>> _operands;
+};
+
+} // namespace ckw
+
+#endif // CKW_INCLUDE_CKW_KERNEL_H
diff --git a/compute_kernel_writer/include/ckw/KernelWriter.h b/compute_kernel_writer/include/ckw/KernelWriter.h
new file mode 100644
index 0000000000..99244fb1a9
--- /dev/null
+++ b/compute_kernel_writer/include/ckw/KernelWriter.h
@@ -0,0 +1,217 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_CKW_KERNELWRITER_H
+#define CKW_INCLUDE_CKW_KERNELWRITER_H
+
+#include "ckw/Kernel.h"
+#include "ckw/TensorInfo.h"
+#include "ckw/TensorOperand.h"
+#include "ckw/TileInfo.h"
+#include "ckw/TileOperand.h"
+
+#include <memory>
+
+namespace ckw
+{
+
+namespace prototype
+{
+class GpuKernelWriterAttribute;
+class IGpuKernelWriter;
+} // namespace prototype
+
+/** Kernel writer. */
+class KernelWriter
+{
+public:
+ // =============================================================================================
+ // Constructors and destructor
+ // =============================================================================================
+
+ /** Initialize a new instance of kernel writer.
+ *
+ * @param[in] kernel The kernel to be written to.
+ */
+ explicit KernelWriter(Kernel &kernel);
+
+ /** Destructor */
+ ~KernelWriter();
+
+ /** No copy constructor. */
+ KernelWriter(const KernelWriter &) = delete;
+
+ /** No copy assignment. */
+ KernelWriter &operator=(const KernelWriter &) = delete;
+
+ // =============================================================================================
+ // Scope management
+ // =============================================================================================
+
+ /** Get the current ID space. */
+ int32_t id_space() const;
+
+ /** Set the current ID space. */
+ KernelWriter &id_space(int32_t id_space);
+
+ /** Switch to and return a new ID space. */
+ int32_t next_id_space();
+
+ // =============================================================================================
+ // Tensor and tile declaration
+ // =============================================================================================
+
+ /** Define a tensor argument.
+ *
+ * @param[in] name The name of the tensor.
+ * @param[in] info The tensor info.
+ *
+ * @return The @ref TensorOperand object.
+ */
+ TensorOperand &create_tensor_argument(const char *name, const TensorInfo &info);
+
+ /** Define a compile-time constant scalar argument.
+ *
+ * @param[in] name The name of the tile.
+ * @param[in] value The value of the tile.
+ *
+ * @return The @ref TileOperand object.
+ */
+ TileOperand &create_tile_argument(const char *name, int32_t value);
+
+ /** Declare a new tile.
+ *
+ * The name of the tile must be unique in the current ID space.
+ *
+ * @param[in] name The name of the tile.
+ * @param[in] ... The necessary arguments to create a new @ref TileOperand.
+ *
+ * @return The @ref TileOperand object.
+ */
+ template <typename... TArgs>
+ TileOperand &declare_tile(const char *name, TArgs &&...args)
+ {
+ const auto var_name = generate_variable_name(name);
+ auto operand = new TileOperand(var_name, ::std::forward<TArgs>(args)...);
+ register_operand(operand, true);
+
+ return *operand;
+ }
+
+ // =============================================================================================
+ // Load and store
+ // =============================================================================================
+
+ /** Load the data from the tensor memory to the tile using the sampling information.
+ *
+ * @param[out] tile The tile to be loaded.
+ * @param[in] tensor The tensor to be read.
+ * @param[in] sampler The tensor sampling information.
+ */
+ void op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler);
+
+ /** Store the tile to the tensor using the specified sampling information.
+ *
+ * @param[out] dst The tensor that the tile is written to.
+ * @param[in] src The tile to be stored.
+ * @param[in] sampler The tensor sampling information.
+ */
+ void op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler);
+
+ // =============================================================================================
+ // Data processing
+ // =============================================================================================
+
+ /** Write assignment: `<dst> = <src>`.
+ *
+ * @param[in] dst The destination tile.
+ * @param[in] src The source tile.
+ */
+ void op_assign(TileOperand &dst, const TileOperand &src);
+
+ /** Write binary expression: `<dst> = <lhs> <op> <rhs>`.
+ *
+ * @param[in] dst The destination tile.
+ * @param[in] lhs The LHS operand.
+ * @param[in] rhs The RHS operand.
+ * @param[in] op The binary operator.
+ */
+ void op_binary_expression(TileOperand &dst, const TileOperand &lhs, const TileOperand &rhs, BinaryOp op);
+
+ /** Write function applied to scalar value: `<dst> = <func>(<src>)`.
+ *
+ * @param[in] dst The destination tile.
+ * @param[in] src The source tile.
+ * @param[in] func The function to be applied to the source tile.
+ */
+ void op_scalar_function(TileOperand &dst, const TileOperand &src, ScalarUnaryFunction func);
+
+ // =============================================================================================
+ // Misc
+ // =============================================================================================
+
+ /** Set `dst` the global ID of dimension `dim`.
+ *
+ * @param[in] dst The tile to be written to.
+ * @param[in] dim The global ID dimension.
+ */
+ void op_get_global_id(TileOperand &dst, int32_t dim);
+
+ // =============================================================================================
+ // Code generation
+ // =============================================================================================
+
+ /** Generate the source code of the kernel. */
+ ::std::string generate_code();
+
+private:
+ /** Generate the full variable name based on the original name and the ID space.
+ *
+ * @param[in] name The name of the variable.
+ *
+ * @return The full variable name.
+ */
+ ::std::string generate_variable_name(const char *name) const;
+
+ /** Register the operand to the kernel.
+ *
+ * The operand is uniquely owned by the kernel afterward.
+ *
+ * @param[in] operand The operand to be registered.
+ * @param[in] declaring Whether the tile declaration is generated.
+ */
+ void register_operand(OperandBase *operand, bool declaring);
+
+private:
+ Kernel *_kernel;
+ ::std::unique_ptr<prototype::GpuKernelWriterAttribute> _impl_attr;
+ ::std::unique_ptr<prototype::IGpuKernelWriter> _impl;
+
+ int32_t _id_space{ 0 };
+ int32_t _max_id_space{ 0 };
+};
+
+} // namespace ckw
+
+#endif // CKW_INCLUDE_CKW_KERNELWRITER_H
diff --git a/compute_kernel_writer/include/ckw/OperandBase.h b/compute_kernel_writer/include/ckw/OperandBase.h
new file mode 100644
index 0000000000..0ea5030968
--- /dev/null
+++ b/compute_kernel_writer/include/ckw/OperandBase.h
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_CKW_OPERANDBASE_H
+#define CKW_INCLUDE_CKW_OPERANDBASE_H
+
+#include "ckw/Types.h"
+#include <string>
+
+namespace ckw
+{
+namespace prototype
+{
+class IGpuKernelWriter;
+class Operand;
+} // namespace prototype
+
+/** The base class for all operands. */
+class OperandBase
+{
+public:
+ /** Constructor
+ *
+ * @param[in] name The name of the operand.
+ */
+ explicit OperandBase(const ::std::string &name);
+
+ /** Destructor */
+ virtual ~OperandBase();
+
+ /** (Internal use only) Create the implementation operand.
+ *
+ * @param[in] writer The implementation kernel writer.
+ */
+ virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const = 0;
+
+ /** Get the name of the operand. */
+ const ::std::string &name() const;
+
+ /** Set the name of the operand. */
+ OperandBase &name(const ::std::string &name);
+
+ /** Get the data type of the operand. */
+ virtual DataType data_type() const = 0;
+
+ /** Get whether the operand is compile-time constant. */
+ virtual bool is_constant() const = 0;
+
+private:
+ ::std::string _name;
+};
+
+} // namespace ckw
+
+#endif // CKW_INCLUDE_CKW_OPERANDBASE_H
diff --git a/compute_kernel_writer/include/ckw/ScalarValue.h b/compute_kernel_writer/include/ckw/ScalarValue.h
new file mode 100644
index 0000000000..cf017d435f
--- /dev/null
+++ b/compute_kernel_writer/include/ckw/ScalarValue.h
@@ -0,0 +1,137 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_CKW_SCALARVALUE_H
+#define CKW_INCLUDE_CKW_SCALARVALUE_H
+
+#include "ckw/Error.h"
+
+#include <cstdint>
+
+namespace ckw
+{
+
+/** The scalar value known at compile-time. */
+class ScalarValue
+{
+public:
+ /** Initialize a new instance of @ref ScalarValue class with integer value 0. */
+ ScalarValue()
+ {
+ _type = Type::INT;
+ _value.i64 = 0;
+ }
+
+ /** Initialize a new instance of @ref ScalarValue class with the specified value. */
+ template <typename T>
+ ScalarValue(T value)
+ {
+ set(value);
+ }
+
+ /** Set the value. */
+ template <typename T>
+ void set(T value)
+ {
+ CKW_ASSERT(::std::is_integral<T>::value || ::std::is_floating_point<T>::value);
+ CKW_ASSERT(sizeof(T) <= 8);
+
+ _size = sizeof(T);
+
+ if(::std::is_integral<T>::value)
+ {
+ if(::std::is_signed<T>::value)
+ {
+ _type = Type::INT;
+ _value.i64 = value;
+ }
+ else
+ {
+ _type = Type::UINT;
+ _value.u64 = value;
+ }
+ }
+ else
+ {
+ _type = Type::FLOAT;
+ _value.f64 = value;
+ }
+ }
+
+ /** Get the value.
+ *
+ * The caller must make sure that what has been stored in the object must fit
+ * the output data type without data corruption or loss of accuracy.
+ */
+ template <typename T>
+ T get() const
+ {
+ CKW_ASSERT(::std::is_integral<T>::value || ::std::is_floating_point<T>::value);
+ CKW_ASSERT(sizeof(T) >= _size);
+
+ if(::std::is_integral<T>::value)
+ {
+ if(::std::is_signed<T>::value)
+ {
+ CKW_ASSERT(_type == Type::INT || _type == Type::UINT);
+ CKW_ASSERT_IF(_type == Type::UINT, sizeof(T) > _size);
+
+ return _value.i64;
+ }
+ else
+ {
+ CKW_ASSERT(_type == Type::INT);
+
+ return _value.u64;
+ }
+ }
+ else
+ {
+ return _value.f64;
+ }
+ }
+
+private:
+ union Value
+ {
+ int64_t i64;
+ uint64_t u64;
+ double f64;
+ };
+
+ enum class Type : int32_t
+ {
+ UINT,
+ INT,
+ FLOAT,
+ };
+
+ Value _value{};
+ Type _type{};
+ uint32_t _size{};
+};
+
+} // namespace ckw
+
+#endif // CKW_INCLUDE_CKW_SCALARVALUE_H
diff --git a/compute_kernel_writer/include/ckw/TensorOperand.h b/compute_kernel_writer/include/ckw/TensorOperand.h
new file mode 100644
index 0000000000..130ab596fb
--- /dev/null
+++ b/compute_kernel_writer/include/ckw/TensorOperand.h
@@ -0,0 +1,181 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_CKW_TENSOROPERAND_H
+#define CKW_INCLUDE_CKW_TENSOROPERAND_H
+
+#include "ckw/OperandBase.h"
+#include "ckw/TensorInfo.h"
+#include "ckw/TensorTileSampler.h"
+#include "ckw/TileOperand.h"
+#include "ckw/Types.h"
+
+#include <memory>
+
+namespace ckw
+{
+
+class TensorComponentOperand;
+
+// =================================================================================================
+// TensorOperand
+// =================================================================================================
+
+/** Tensor operand */
+class TensorOperand : public OperandBase
+{
+public:
+ /** Initialize a new instance of @ref TensorOperand class.
+ *
+ * @param[in] name The name of the tensor.
+ * @param[in] info The tensor info.
+ */
+ TensorOperand(const ::std::string &name, const TensorInfo &info);
+
+ /** No copy constructor. */
+ TensorOperand(const TensorOperand &other) = delete;
+
+ /** No copy assignment. */
+ TensorOperand &operator=(const TensorOperand &other) = delete;
+
+ /** (Internal use only) Create the implementation operand.
+ *
+ * @param[in] writer The implementation kernel writer.
+ */
+ virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override;
+
+ /** Get the tensor info. */
+ const TensorInfo &info() const;
+
+ /** Get the tensor info. */
+ TensorInfo &info();
+
+ /** Get the data type. */
+ virtual DataType data_type() const override;
+
+ /** Get whether the tensor is compile-time constant. */
+ virtual bool is_constant() const override;
+
+ /** Get the default tile attached to the tensor. */
+ const TileOperand &tile() const;
+
+ /** Get the default tile attached to the tensor. */
+ TileOperand &tile();
+
+ /** Set the default tile attached to the tensor. */
+ TensorOperand &tile(TileOperand &tile);
+
+ /** Get the tensor sampler of the default tile. */
+ const TensorTileSampler &tile_sampler() const;
+
+ /** Get the tensor sampler of the default tile. */
+ TensorTileSampler &tile_sampler();
+
+ /** Set the tensor sampler of the default tile. */
+ TensorOperand &tile_sampler(const TensorTileSampler &value);
+
+ /** Get the operand that contains the stride in y dimension of the tensor. */
+ TileOperand &stride1();
+
+ /** Get the operand that contains the stride in z dimension of the tensor. */
+ TileOperand &stride2();
+
+ /** Get the operand that contains the stride in w dimension of the tensor. */
+ TileOperand &stride3();
+
+ /** Get the operand that contains the stride in w dimension of the tensor. */
+ TileOperand &stride4();
+
+ /** Get the operand that contains the size of dimension 0 of the tensor. */
+ TileOperand &dim0();
+
+ /** Get the operand that contains the size of dimension 1 of the tensor. */
+ TileOperand &dim1();
+
+ /** Get the operand that contains the size of dimension 2 of the tensor. */
+ TileOperand &dim2();
+
+ /** Get the operand that contains the size of dimension 3 of the tensor. */
+ TileOperand &dim3();
+
+ /** Get the operand that contains the size of dimension 4 of the tensor. */
+ TileOperand &dim4();
+
+ /** Get the operand that contains the size of dimensions 1 and 2 collapsed. */
+ TileOperand &dim1_dim2();
+
+ /** Get the operand that contains the size of dimensions 1, 2 and 3 collapsed. */
+ TileOperand &dim1_dim2_dim3();
+
+ /** Get the operand that contains the offset in bytes to the first element. */
+ TileOperand &offset_first_element_in_bytes();
+
+private:
+ TensorInfo _info;
+
+ TileOperand *_tile{ nullptr };
+ TensorTileSampler _tile_sampler{};
+
+ ::std::unique_ptr<TensorComponentOperand> _stride1{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _stride2{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _stride3{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _stride4{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _dim0{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _dim1{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _dim2{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _dim3{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _dim4{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _dim1_dim2{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _dim1_dim2_dim3{ nullptr };
+ ::std::unique_ptr<TensorComponentOperand> _offset_first_element_in_bytes{ nullptr };
+};
+
+// =================================================================================================
+// TensorComponentOperand
+// =================================================================================================
+
+/** Tile operand that contains tensor information. */
+class TensorComponentOperand : public TileOperand
+{
+public:
+ /** Initialize a new instance of @ref TensorComponentOperand class.
+ *
+ * @param[in] name The name of the operand.
+ * @param[in] component The tensor info component.
+ */
+ TensorComponentOperand(const ::std::string &name, TensorComponent component);
+
+ /** (Internal use only) Create the implementation operand.
+ *
+ * @param[in] writer The implementation kernel writer.
+ */
+ virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override;
+
+private:
+ TensorComponent _component;
+};
+
+} // namespace ckw
+
+#endif // CKW_INCLUDE_CKW_TENSOROPERAND_H
diff --git a/compute_kernel_writer/include/ckw/TensorTileSampler.h b/compute_kernel_writer/include/ckw/TensorTileSampler.h
new file mode 100644
index 0000000000..5ef7bca647
--- /dev/null
+++ b/compute_kernel_writer/include/ckw/TensorTileSampler.h
@@ -0,0 +1,163 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_CKW_TENSORTILESAMPLER_H
+#define CKW_INCLUDE_CKW_TENSORTILESAMPLER_H
+
+#include "ckw/Types.h"
+#include <functional>
+
+namespace ckw
+{
+
+class TileOperand;
+
+/** Tensor sampler
+ *
+ * It contains information about how the result tile should be stored to tensor memory.
+ * It can also be used to dictate how the subsequent operators fetch the input tensor.
+ */
+class TensorTileSampler
+{
+public:
+ /** Initialize a new instance of @ref TensorSampler class. */
+ TensorTileSampler();
+
+ /** Initialize a new instance of @ref TensorSampler class.
+ *
+ * @param[in] x The coordinate in the x dimension.
+ * @param[in] y The coordinate in the y dimension.
+ * @param[in] z The coordinate in the z dimension.
+ * @param[in] b The coordinate in the batch dimension.
+ * @param[in] format The tensor data format.
+ * @param[in] address_mode_x The address mode of the x dimension.
+ * @param[in] address_mode_y The address mode of the y dimension.
+ * @param[in] address_mode_z The address mode of the z dimension.
+ */
+ TensorTileSampler(
+ TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
+ TensorSamplerFormat format,
+ TensorSamplerAddressModeX address_mode_x,
+ TensorSamplerAddressModeY address_mode_y,
+ TensorSamplerAddressModeZ address_mode_z);
+
+ /** Initialize a new instance of @ref TensorSampler class.
+ *
+ * @param[in] x The coordinate in the x dimension.
+ * @param[in] y The coordinate in the y dimension.
+ * @param[in] z The coordinate in the z dimension.
+ * @param[in] b The coordinate in the batch dimension.
+ * @param[in] height The height of the tile.
+ * @param[in] width The width of the tile.
+ * @param[in] format The tensor data format.
+ * @param[in] address_mode_x The address mode of the x dimension.
+ * @param[in] address_mode_y The address mode of the y dimension.
+ * @param[in] address_mode_z The address mode of the z dimension.
+ */
+ TensorTileSampler(
+ TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
+ int32_t height, int32_t width,
+ TensorSamplerFormat format,
+ TensorSamplerAddressModeX address_mode_x,
+ TensorSamplerAddressModeY address_mode_y,
+ TensorSamplerAddressModeZ address_mode_z);
+
+ /** Get the coordinate in the x dimension. */
+ const TileOperand &x() const;
+
+ /** Set the coordinate in the x dimension. */
+ TensorTileSampler &x(TileOperand &x);
+
+ /** Get the coordinate in the y dimension. */
+ const TileOperand &y() const;
+
+ /** Set the coordinate in the y dimension. */
+ TensorTileSampler &y(TileOperand &y);
+
+ /** Get the coordinate in the z dimension. */
+ const TileOperand &z() const;
+
+ /** Set the coordinate in the z dimension. */
+ TensorTileSampler &z(TileOperand &z);
+
+ /** Get the coordinate in the batch dimension. */
+ const TileOperand &b() const;
+
+ /** Set the coordinate in the batch dimension. */
+ TensorTileSampler &b(TileOperand &b);
+
+ /** Get the width of the tile. */
+ int32_t width() const;
+
+ /** Set the width of the tile. */
+ TensorTileSampler &width(int32_t width);
+
+ /** Get the height of the tile. */
+ int32_t height() const;
+
+ /** Set the height of the tile. */
+ TensorTileSampler &height(int32_t height);
+
+ /** Get the format of the tensor. */
+ TensorSamplerFormat format() const;
+
+ /** Set the format of the tensor. */
+ TensorTileSampler &format(TensorSamplerFormat format);
+
+ /** Get the address mode of the x dimension. */
+ TensorSamplerAddressModeX address_mode_x() const;
+
+ /** Set the address mode of the x-dimension. */
+ TensorTileSampler &address_mode_x(TensorSamplerAddressModeX address_mode_x);
+
+ /** Get the address mode of the y dimension. */
+ TensorSamplerAddressModeY address_mode_y() const;
+
+ /** Set the address mode of the y dimension. */
+ TensorTileSampler &address_mode_y(TensorSamplerAddressModeY address_mode_y);
+
+ /** Get the address mode of the z dimension. */
+ TensorSamplerAddressModeZ address_mode_z() const;
+
+ /** Set the address mode of the z dimension. */
+ TensorTileSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z);
+
+private:
+ TileOperand *_x{ nullptr };
+ TileOperand *_y{ nullptr };
+ TileOperand *_z{ nullptr };
+ TileOperand *_b{ nullptr };
+
+ int32_t _height{ 0 };
+ int32_t _width{ 0 };
+
+ TensorSamplerFormat _format{ TensorSamplerFormat::Unknown };
+ TensorSamplerAddressModeX _address_mode_x{ TensorSamplerAddressModeX::Unknown };
+ TensorSamplerAddressModeY _address_mode_y{ TensorSamplerAddressModeY::Unknown };
+ TensorSamplerAddressModeZ _address_mode_z{ TensorSamplerAddressModeZ::Unknown };
+};
+
+} // namespace ckw
+
+#endif // CKW_INCLUDE_CKW_TENSORTILESAMPLER_H
diff --git a/compute_kernel_writer/include/ckw/TileInfo.h b/compute_kernel_writer/include/ckw/TileInfo.h
index 5f9d037a66..06c910c9fd 100644
--- a/compute_kernel_writer/include/ckw/TileInfo.h
+++ b/compute_kernel_writer/include/ckw/TileInfo.h
@@ -57,10 +57,10 @@ public:
/** Constructor used to initialize a tile with a given data type and tile sizes.
*
* @param[in] dt Tile data type
- * @param[in] w Tile width
* @param[in] h Tile height
+ * @param[in] w Tile width
*/
- TileInfo(DataType dt, int32_t w, int32_t h);
+ TileInfo(DataType dt, int32_t h, int32_t w);
/** Set width */
TileInfo &width(int32_t w);
/** Get width */
diff --git a/compute_kernel_writer/include/ckw/TileOperand.h b/compute_kernel_writer/include/ckw/TileOperand.h
new file mode 100644
index 0000000000..1eee18589f
--- /dev/null
+++ b/compute_kernel_writer/include/ckw/TileOperand.h
@@ -0,0 +1,110 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_INCLUDE_CKW_TILEOPERAND_H
+#define CKW_INCLUDE_CKW_TILEOPERAND_H
+
+#include "ckw/Error.h"
+#include "ckw/OperandBase.h"
+#include "ckw/ScalarValue.h"
+#include "ckw/TileInfo.h"
+
+#include <vector>
+
+namespace ckw
+{
+
+class Kernel;
+
+/** Tile operand which can be either scalar, vector or 2D tile. */
+class TileOperand : public OperandBase
+{
+public:
+ /** Initialize a new instance of @ref TileOperand class with the tile information.
+ *
+ * @param[in] name The name of the tile.
+ * @param[in] tile_info The tile info.
+ */
+ TileOperand(const ::std::string &name, const TileInfo &tile_info);
+
+ /** Initialize a new instance of @ref TileOperand for scalar variable.
+ *
+ * @param[in] name The name of the tile.
+ * @param[in] data_type The data type of the tile.
+ */
+ TileOperand(const ::std::string &name, DataType data_type);
+
+ /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable.
+ *
+ * @param[in] name The name of the tile.
+ * @param[in] value The value of the tile.
+ */
+ TileOperand(const ::std::string &name, int32_t value);
+
+ /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable.
+ *
+ * @param[in] name The name of the tile.
+ * @param[in] value The value of the tile.
+ */
+ TileOperand(const ::std::string &name, float value);
+
+ /** Prohibit copy of tile operand. */
+ TileOperand(const TileOperand &) = delete;
+
+ /** Prohibit copy of tile operand. */
+ TileOperand &operator=(const TileOperand &) = delete;
+
+ /** (Internal use only) Create the implementation operand.
+ *
+ * @param[in] writer The implementation kernel writer.
+ */
+ virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override;
+
+ /** Get the tile info. */
+ const TileInfo &tile_info() const;
+
+ /** Get the data type of the tile. */
+ virtual DataType data_type() const override;
+
+ /** Get whether the tile is compile-time constant. */
+ virtual bool is_constant() const override;
+
+ /** Get whether the tile is a scalar value. */
+ bool is_scalar() const;
+
+ /** Get the scalar value of the tile.
+ *
+ * The tile must have the shape of 1, 1 (i.e. scalar).
+ */
+ ScalarValue scalar_value() const;
+
+private:
+ TileInfo _info;
+ ScalarValue _value{};
+ bool _constant;
+};
+
+} // namespace ckw
+
+#endif // CKW_INCLUDE_CKW_TILEOPERAND_H
diff --git a/compute_kernel_writer/include/ckw/Types.h b/compute_kernel_writer/include/ckw/Types.h
index c9f80b65e0..5516718e54 100644
--- a/compute_kernel_writer/include/ckw/Types.h
+++ b/compute_kernel_writer/include/ckw/Types.h
@@ -21,25 +21,120 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
-#ifndef COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TYPES_H
-#define COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TYPES_H
+
+#ifndef CKW_INCLUDE_CKW_TYPES_H
+#define CKW_INCLUDE_CKW_TYPES_H
+
+#include <array>
+#include <cstdint>
namespace ckw
{
+
/** Compute Kernel Writer data types. This data type is used by the code variables and tensor arguments. */
enum class DataType
{
+ Unknown = 0x00,
+ Fp32 = 0x11,
+ Fp16 = 0x12,
+ Int32 = 0x21,
+ Int16 = 0x22,
+ Int8 = 0x24,
+ Uint32 = 0x31,
+ Uint16 = 0x32,
+ Uint8 = 0x34,
+ Bool = 0x41
+};
+
+enum class GpuTargetLanguage
+{
Unknown,
- Fp32,
- Fp16,
- Int32,
- Int16,
- Int8,
- Uint32,
- Uint16,
- Uint8,
- Bool
+ OpenCL
+};
+
+/* Binary operations
+*/
+enum class BinaryOp : int32_t
+{
+ // Elementwise
+ Add = 0x0000, // +
+ Sub = 0x0001, // -
+ Mul = 0x0002, // *
+ Div = 0x0003, // /
+ Mod = 0x0004, // %
+ // Relational
+ Equal = 0x1000, // ==
+ Less = 0x1001, // <
+ LessEqual = 0x1002, // <=
+ Greater = 0x1003, // >
+ GreaterEqual = 0x1004, // >=
+ // Algebra
+ MatMul_Nt_Nt = 0x2000, // X
+ MatMul_Nt_T = 0x2001, // X
+ MatMul_T_Nt = 0x2002, // X
+ MatMul_T_T = 0x2003, // X
+ Dot = 0x2004, // .
+ // Logical
+ LogicalAnd = 0x3000, // &&
+ LogicalOr = 0x3001, // ||
+ LogicalNot = 0x3002 // !
+};
+
+enum class AssignmentOp : int32_t
+{
+ // Unary
+ Increment = 0x0000, // +=
+ Decrement = 0x0001, // -=
+};
+
+enum class ScalarUnaryFunction : int32_t
+{
+ Exp,
+};
+
+enum class TensorSamplerFormat : int32_t
+{
+ Unknown = 0,
+ C_WH_1 = 1,
+ C_W_H = 2
+};
+
+enum class TensorSamplerAddressModeX : int32_t
+{
+ Unknown = 0,
+ None = 1, // The user guarantees that the X coordinate is always in-bound
+ OverlappingMin = 2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length
+ // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice.
};
+
+enum class TensorSamplerAddressModeY : int32_t
+{
+ Unknown = 0,
+ None = 1, // The user guarantees that the Y coordinate is always in-bound
+ OverlappingMin = 2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length
+ Skip = 3, // Skip the read/write
+ SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0
+ SkipMaxEdgeOnly = 5, // Skip less than 0 only
+ ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y)
+ ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX
+ ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0
+ ClampToBorder = 9, // Clamp to border which always has 0 value
+ ClampToBorderMinEdgeOnly = 10,
+ ClampToBorderMaxEdgeOnly = 11
+};
+
+enum class TensorSamplerAddressModeZ : int32_t
+{
+ Unknown = 0,
+ None = 1, // The user guarantees that the Y coordinate is always in-bound
+ Skip = 3, // Skip the read/write
+ SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0
+ SkipMaxEdgeOnly = 5, // Skip less than 0 only
+ ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y)
+ ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX
+ ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0
+};
+
} // namespace ckw
-#endif /* COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TYPES_H */
+#endif // CKW_INCLUDE_CKW_TYPES_H
diff --git a/compute_kernel_writer/src/Kernel.cpp b/compute_kernel_writer/src/Kernel.cpp
new file mode 100644
index 0000000000..bbf5c440a7
--- /dev/null
+++ b/compute_kernel_writer/src/Kernel.cpp
@@ -0,0 +1,61 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ckw/Kernel.h"
+#include "ckw/Types.h"
+#include "src/Prototype.h"
+
+namespace ckw
+{
+
+Kernel::Kernel(const char *name, GpuTargetLanguage language)
+ : _name(name), _kernel(std::make_unique<prototype::GpuKernelWriterDataHolder>(language)), _operands{}
+{
+}
+
+Kernel::~Kernel()
+{
+}
+
+const std::string &Kernel::name() const
+{
+ return _name;
+}
+
+const std::map<std::string, std::unique_ptr<OperandBase>> &Kernel::operands() const
+{
+ return _operands;
+}
+
+std::map<std::string, std::unique_ptr<OperandBase>> &Kernel::operands()
+{
+ return _operands;
+}
+
+prototype::GpuKernelWriterDataHolder *Kernel::impl()
+{
+ return _kernel.get();
+}
+
+} // namespace ckw
diff --git a/compute_kernel_writer/src/KernelWriter.cpp b/compute_kernel_writer/src/KernelWriter.cpp
new file mode 100644
index 0000000000..28538e7893
--- /dev/null
+++ b/compute_kernel_writer/src/KernelWriter.cpp
@@ -0,0 +1,227 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ckw/KernelWriter.h"
+#include "ckw/Error.h"
+#include "ckw/TensorOperand.h"
+#include "src/Prototype.h"
+
+#include <sstream>
+
+namespace ckw
+{
+
+namespace
+{
+
+inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info)
+{
+ return prototype::TensorInfo{ info.shape(), info.data_type(), info.data_layout(), info.id() };
+}
+
+} // namespace
+
+// =================================================================================================
+// Constructors and destructor
+// =================================================================================================
+
+KernelWriter::KernelWriter(Kernel &kernel)
+ : _kernel(&kernel),
+ _impl_attr(std::make_unique<prototype::GpuKernelWriterAttribute>()),
+ _impl(prototype::GpuKernelWriterFactory::create(_impl_attr.get(), kernel.impl()))
+{
+ _impl->set_IdSpace(1);
+}
+
+KernelWriter::~KernelWriter()
+{
+}
+
+// =================================================================================================
+// Scope management
+// =================================================================================================
+
+int32_t KernelWriter::id_space() const
+{
+ return _id_space;
+}
+
+KernelWriter &KernelWriter::id_space(int32_t id_space)
+{
+ CKW_ASSERT(id_space <= _max_id_space);
+
+ _id_space = id_space;
+ return *this;
+}
+
+int32_t KernelWriter::next_id_space()
+{
+ id_space(++_max_id_space);
+ return _id_space;
+}
+
+// =================================================================================================
+// Tensor and tile declaration
+// =================================================================================================
+
+TensorOperand &KernelWriter::create_tensor_argument(const char *name, const TensorInfo &info)
+{
+ const auto var_name = generate_variable_name(name);
+
+ _impl->declare_argument(var_name, create_impl_tensor_info(info));
+
+ auto operand = new TensorOperand(var_name, info);
+ register_operand(operand, false);
+
+ return *operand;
+}
+
+TileOperand &KernelWriter::create_tile_argument(const char *name, int32_t value)
+{
+ const auto var_name = generate_variable_name(name);
+
+ auto operand = new TileOperand(var_name, value);
+ register_operand(operand, false);
+
+ return *operand;
+}
+
+std::string KernelWriter::generate_variable_name(const char *name) const
+{
+ std::stringstream var_name;
+
+ var_name << "_" << _id_space << "_" << name;
+
+ return var_name.str();
+}
+
+void KernelWriter::register_operand(OperandBase *operand, bool declaring)
+{
+ const auto &name = operand->name();
+ auto &operands = _kernel->operands();
+
+ CKW_ASSERT(operands.find(name) == operands.end());
+ operands[name] = std::unique_ptr<OperandBase>(operand);
+
+ if(declaring && !operand->is_constant())
+ {
+ const auto tile = reinterpret_cast<TileOperand *>(operand);
+
+ const auto &info = tile->tile_info();
+ _impl->declare_tile(tile->name(), prototype::TileInfo(info.data_type(), info.width(), info.height()));
+ }
+}
+
+// =================================================================================================
+// Load and store
+// =================================================================================================
+
+void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler)
+{
+ auto impl_tensor = prototype::TensorOperand(
+ tensor.name(),
+ prototype::GpuSampler{
+ sampler.format(),
+ prototype::GpuSamplerTensorStorage::BufferUint8Ptr,
+ sampler.address_mode_x(),
+ sampler.address_mode_y(),
+ sampler.address_mode_z() });
+
+ auto impl_x = sampler.x().create_impl_operand(_impl.get());
+ auto impl_y = sampler.y().create_impl_operand(_impl.get());
+ auto impl_z = sampler.z().create_impl_operand(_impl.get());
+ auto impl_b = sampler.b().create_impl_operand(_impl.get());
+
+ auto impl_dst = tile.create_impl_operand(_impl.get());
+
+ _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b);
+}
+
+void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler)
+{
+ auto impl_tensor = prototype::TensorOperand(
+ tensor.name(),
+ prototype::GpuSampler{
+ sampler.format(),
+ prototype::GpuSamplerTensorStorage::BufferUint8Ptr,
+ sampler.address_mode_x(),
+ sampler.address_mode_y(),
+ sampler.address_mode_z() });
+ auto impl_src = tile.create_impl_operand(_impl.get());
+ auto impl_x = sampler.x().create_impl_operand(_impl.get());
+ auto impl_y = sampler.y().create_impl_operand(_impl.get());
+ auto impl_z = sampler.z().create_impl_operand(_impl.get());
+ auto impl_b = sampler.b().create_impl_operand(_impl.get());
+
+ _impl->op_store_immediate(impl_tensor, impl_src, impl_x, impl_y, impl_z, impl_b);
+}
+
+// =================================================================================================
+// Data processing
+// =================================================================================================
+
+void KernelWriter::op_assign(TileOperand &dst, const TileOperand &src)
+{
+ auto impl_dst = dst.create_impl_operand(_impl.get());
+ auto impl_src = src.create_impl_operand(_impl.get());
+
+ _impl->op_assign(impl_dst, impl_src);
+}
+
+void KernelWriter::op_binary_expression(TileOperand &dst, const TileOperand &lhs, const TileOperand &rhs, BinaryOp op)
+{
+ auto impl_lhs = lhs.create_impl_operand(_impl.get());
+ auto impl_rhs = rhs.create_impl_operand(_impl.get());
+ auto impl_dst = dst.create_impl_operand(_impl.get());
+
+ _impl->op_binary_expression(impl_dst, impl_lhs, op, impl_rhs);
+}
+
+void KernelWriter::op_scalar_function(TileOperand &dst, const TileOperand &src, ScalarUnaryFunction opcode)
+{
+ auto impl_dst = dst.create_impl_operand(_impl.get());
+ auto impl_src = src.create_impl_operand(_impl.get());
+
+ _impl->op_scalar_function(impl_dst, impl_src, opcode);
+}
+
+// =================================================================================================
+// Misc
+// =================================================================================================
+
+void KernelWriter::op_get_global_id(TileOperand &dst, int32_t dim)
+{
+ _impl->op_get_global_id(prototype::Operand(dst.name()), dim);
+}
+
+// =================================================================================================
+// Code generation
+// =================================================================================================
+
+std::string KernelWriter::generate_code()
+{
+ return prototype::generate_code(*_kernel->impl(), _kernel->name());
+}
+
+} // namespace ckw
diff --git a/compute_kernel_writer/src/OperandBase.cpp b/compute_kernel_writer/src/OperandBase.cpp
new file mode 100644
index 0000000000..59cf846cc7
--- /dev/null
+++ b/compute_kernel_writer/src/OperandBase.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ckw/OperandBase.h"
+
+namespace ckw
+{
+
+OperandBase::OperandBase(const std::string &name)
+ : _name(name)
+{
+}
+
+OperandBase::~OperandBase()
+{
+}
+
+const std::string &OperandBase::name() const
+{
+ return _name;
+}
+
+OperandBase &OperandBase::name(const std::string &name)
+{
+ _name = name;
+ return *this;
+}
+
+} // namespace ckw
diff --git a/compute_kernel_writer/src/Prototype.h b/compute_kernel_writer/src/Prototype.h
new file mode 100644
index 0000000000..f113a0bfbc
--- /dev/null
+++ b/compute_kernel_writer/src/Prototype.h
@@ -0,0 +1,3742 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef CKW_SRC_PROTOTYPE_H
+#define CKW_SRC_PROTOTYPE_H
+
+#include <vector>
+#include <map>
+#include <string>
+#include <cstdint> // int32_t
+#include <iostream> // cout (to be removed)
+#include <cassert> // assert (to be removed)
+#include <unordered_map>
+#include <chrono>
+#include <cmath>
+#include <memory>
+#include <algorithm>
+#include <array>
+#include <stdexcept>
+
+#include "ckw/Types.h"
+#include "ckw/TensorInfo.h"
+#include "ckw/Error.h"
+
+namespace ckw
+{
+namespace prototype {
+
+// Dummy data structure for Size2D
+using Size2D = std::vector<int32_t>;
+
+// Dummy Status
+using Status = void;
+
+enum class ComponentType : int32_t
+{
+ Complex = 0,
+ Simple = 1,
+ Unfusable = 2
+};
+
+enum class GpuCompilationSpeed
+{
+ Fast = 0x00, // fast compilation may increase the latency of the network
+ Slow = 0x01 // slow compilation may decrease the latency of the network
+};
+
+enum class GpuExtensions
+{
+ Fp16,
+ Dot8,
+ Mmul,
+ FastMath
+};
+
+struct TensorInfo
+{
+ TensorShape shape { {0} };
+ DataType data_type { DataType::Unknown };
+ TensorDataLayout data_layout { TensorDataLayout::Nhwc };
+ int32_t id { -1 };
+};
+
+struct ComponentAttribute
+{
+ GpuCompilationSpeed compilation_speed {GpuCompilationSpeed::Fast};
+ bool overwrite_tile { true };
+};
+
+inline std::string data_type_to_cl_type(DataType dt)
+{
+ switch(dt)
+ {
+ case DataType::Fp32:
+ return "float";
+ case DataType::Fp16:
+ return "half";
+ case DataType::Int8:
+ return "char";
+ case DataType::Uint8:
+ return "uchar";
+ case DataType::Uint16:
+ return "ushort";
+ case DataType::Int16:
+ return "short";
+ case DataType::Uint32:
+ return "uint";
+ case DataType::Int32:
+ return "int";
+ case DataType::Bool:
+ return "bool";
+ default:
+ assert(false);
+ }
+}
+
+inline int32_t width_to_cl_vector_size(int32_t width)
+{
+ switch(width)
+ {
+ case 1:
+ return 1;
+ case 2:
+ return 2;
+ case 3:
+ return 3;
+ case 4:
+ return 4;
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ return 8;
+ case 9:
+ case 10:
+ case 11:
+ case 12:
+ case 13:
+ case 14:
+ case 15:
+ case 16:
+ return 16;
+ default:
+ assert(false);
+ }
+}
+
+inline std::string get_cl_data_type(DataType dt, int32_t width)
+{
+ std::string data_type;
+ int32_t w = width_to_cl_vector_size(width);
+ data_type += data_type_to_cl_type(dt);
+ if(w != 1)
+ {
+ data_type += std::to_string(w);
+ }
+ return data_type;
+}
+
+inline std::string to_opencl_store(int32_t vector_length)
+{
+ if(vector_length != 1)
+ {
+ return "vstore" + std::to_string(vector_length) + "(";
+ }
+ else
+ {
+ return "*(";
+ }
+}
+
+struct TileInfo
+{
+ TileInfo() {}
+ TileInfo(DataType dt) : dt(dt), w(1), h(1) {}
+ TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1) {}
+ TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height) {}
+ DataType dt{ DataType::Unknown }; // Data type of the tile
+ int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels)
+ int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions)
+};
+
+inline std::ostream& operator << (std::ostream& o, const TileInfo& a)
+{
+ o << a.w << " x " << a.h;
+ return o;
+}
+
+struct DataTypeAsString
+{
+ std::string str { "" };
+ DataType dt { DataType::Unknown };
+ int32_t size { 1 };
+};
+
+struct ValueAsString
+{
+ std::string str { "" };
+ DataTypeAsString type { };
+};
+
+// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c
+// A Tile is a collection of variables used to express a 2D data.
+class IScalarTile
+{
+public:
+ virtual ~IScalarTile() = default;
+ /** Method to get the scalar variable from a tile
+ * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
+ * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
+ *
+ * @return the scalar variable as a string
+ */
+ virtual ValueAsString scalar(int32_t x, int32_t y) const = 0;
+ /** Method to get the list of underlying variable names used by the tile
+ *
+ * @return the list of variable names
+ */
+ virtual std::vector<ValueAsString> underlying_source_variables() const = 0;
+ /** Method to get the name of the tile.
+ *
+ * @return the name of the tile
+ */
+ std::string name() const
+ {
+ return _basename;
+ }
+ /** Method to get the tile format
+ *
+ * @return the format
+ */
+ TileInfo format() const
+ {
+ return _format;
+ }
+ /** Method to know whether the tile is assignable or not (constant)
+ *
+ * @return true if the tile is assignable
+ */
+ virtual bool is_assignable() const = 0;
+ /** Method to know whether the tile needs to be declared
+ *
+ * @return true if the tile needs to be declared in the code before being used
+ */
+ virtual bool need_declaration() const = 0;
+protected:
+ TileInfo _format { }; // Tile format
+ std::string _basename { "" }; // Tile name
+};
+
+// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
+// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors
+class IVectorTile : public IScalarTile
+{
+public:
+ virtual ~IVectorTile() = default;
+ /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
+ * The user can query the list of supported width for the vectors through preferred_vector_sizes().
+ *
+ * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
+ *
+ * @return the vector variable as a string
+ */
+ virtual ValueAsString vector(int32_t y) const = 0;
+ /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
+ *
+ * @return the vector variable as a string
+ */
+ virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0;
+ /** Method to get the preferred vector sizes.
+ *
+ * @return a vector with the preferred vector sizes
+ */
+ //virtual std::vector<int32_t> preferred_vector_sizes() const = 0;
+};
+
+class ClTile : public IVectorTile
+{
+public:
+ ClTile(const std::string& name, TileInfo format)
+ {
+ _format = format;
+ _basename = name;
+ }
+
+ ValueAsString scalar(int32_t x, int32_t y) const override
+ {
+ x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
+ y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
+
+ ValueAsString t;
+ t.str = build_variable_name(y);
+ t.type.str = get_cl_data_type(_format.dt, 1);
+ t.type.dt = _format.dt;
+ t.type.size = 1;
+
+ // Check required because if the width has only one element, we cannot use .s0
+ if(_format.w != 1)
+ {
+ // Automatic broadcasting
+ t.str += ".s" + std::to_string(x);
+ }
+
+ return t;
+ }
+
+ ValueAsString vector(int32_t y) const override
+ {
+ y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
+
+ ValueAsString t;
+ t.str = build_variable_name(y);
+ t.type.str = get_cl_data_type(_format.dt, _format.w);
+ t.type.dt = _format.dt;
+ t.type.size = _format.w;
+ return t;
+ }
+
+ ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
+ {
+ y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
+
+ ValueAsString t;
+ t.str = build_variable_name(y);
+ t.type.str = get_cl_data_type(_format.dt, width);
+ t.type.dt = _format.dt;
+ t.type.size = width;
+
+ if(_format.w != 1)
+ {
+ t.str += ".s";
+ for(int i = 0; i < width; ++i)
+ {
+ t.str += to_scalar_hex(x_start + i);
+ }
+ }
+ return t;
+ }
+
+ std::vector<ValueAsString> underlying_source_variables() const override
+ {
+ std::vector<ValueAsString> vars;
+ for(int32_t y = 0; y < _format.h; ++y)
+ {
+ ValueAsString t;
+ t.str = build_variable_name(y);
+ t.type.str = get_cl_data_type(_format.dt, _format.w);
+ t.type.dt = _format.dt;
+ t.type.size = _format.w;
+ vars.push_back(t);
+ }
+ return vars;
+ }
+
+ bool is_assignable() const override
+ {
+ return true;
+ }
+
+ bool need_declaration() const override
+ {
+ return true;
+ }
+
+private:
+ std::string build_variable_name(int32_t y) const
+ {
+ std::string var_name = _basename;
+
+ if(_format.h == 1)
+ {
+ return var_name;
+
+ }
+ else
+ {
+ var_name += "_";
+ var_name += std::to_string(y);
+ }
+
+ return var_name;
+ }
+
+ std::string to_scalar_hex(int32_t x) const
+ {
+ switch(x)
+ {
+ case 0:
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 5:
+ case 6:
+ case 7:
+ case 8:
+ case 9:
+ return std::to_string(x);
+ case 10:
+ return "A";
+ case 11:
+ return "B";
+ case 12:
+ return "C";
+ case 13:
+ return "D";
+ case 14:
+ return "E";
+ case 15:
+ return "F";
+ default:
+ std::cout << "Unsupported hexadecimal value" << std::endl;
+ assert(false);
+ }
+ }
+};
+
+// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time.
+class ClConstantTile : public IVectorTile
+{
+public:
+ ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt)
+ {
+ _format.w = in[0].size();
+ _format.h = in.size();
+ _format.dt = dt;
+
+ _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
+
+ for(int32_t y = 0; y < _format.h; ++y)
+ {
+ for(int32_t x = 0; x < _format.w; ++x)
+ {
+ _data[y][x] = in[y][x];
+ }
+ }
+ }
+
+ ValueAsString scalar(int32_t x, int32_t y) const override
+ {
+ x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
+ y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
+
+ ValueAsString t;
+ t.str = _data[y][x];
+ t.type.str = get_cl_data_type(_format.dt, 1);
+ t.type.dt = _format.dt;
+ t.type.size = 1;
+
+ return t;
+ }
+
+ ValueAsString vector(int32_t y) const override
+ {
+ y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
+
+ return vector(0, _format.w, y);
+ }
+
+ ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
+ {
+ y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
+
+ ValueAsString t;
+ t.str = "";
+ t.type.str = get_cl_data_type(_format.dt, width);
+ t.type.dt = _format.dt;
+ t.type.size = width;
+
+ if(width > 1)
+ {
+ t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
+ }
+
+ int32_t x = x_start;
+ for(; x < width - 1; ++x)
+ {
+ t.str += scalar(x, y).str;
+ t.str += ", ";
+ }
+ t.str += scalar(x, y).str;
+
+ if(width > 1)
+ {
+ t.str += "))";
+ }
+
+ return t;
+ }
+
+ std::vector<ValueAsString> underlying_source_variables() const override
+ {
+ std::vector<ValueAsString> vars;
+
+ for(int32_t y = 0; y < _format.h; ++y)
+ {
+ for(int32_t x = 0; x < _format.w; ++x)
+ {
+ ValueAsString t;
+ t.str = _data[y][x];
+ t.type.str = get_cl_data_type(_format.dt, 1);
+ t.type.dt = _format.dt;
+ t.type.size = 1;
+ vars.push_back(t);
+ }
+ }
+
+ return vars;
+ }
+
+ bool is_assignable() const override
+ {
+ return false;
+ }
+
+ bool need_declaration() const override
+ {
+ return false;
+ }
+
+private:
+ std::vector<std::vector<std::string>> _data{};
+};
+
+enum class TensorComponentIndex : int32_t
+{
+ IndexMask = 0x0000000f,
+};
+
+enum class TensorComponentType : int32_t
+{
+ OffsetFirstElement = 0x00000100,
+ Stride = 0x00001000,
+ Dimension = 0x00010000,
+ FoldedDimension = 0x00100000,
+ Constant = 0x01000000
+};
+
+enum class TensorComponent : int32_t
+{
+ Unknown = 0x00000000,
+ OffsetFirstElement = 0x00000100,
+ Stride1 = 0x00001001,
+ Stride2 = 0x00001002,
+ Stride3 = 0x00001003,
+ Stride4 = 0x00001004,
+ Dim0 = 0x00010000,
+ Dim1 = 0x00010001,
+ Dim2 = 0x00010002,
+ Dim3 = 0x00010003,
+ Dim4 = 0x00010004,
+ C = 0x00010000, // Dim0
+ W = 0x00010001, // Dim1
+ H = 0x00010002, // Dim2
+ D = 0x00010003,
+ N = 0x00010004,
+ Dim1xDim2 = 0x00100021,
+ Dim1xDim2xDim3 = 0x00100321,
+ WxH = 0x00100021,
+ WxHxD = 0x00100321
+};
+
+inline std::string to_string(TensorComponent x)
+{
+ switch(x)
+ {
+ case TensorComponent::Unknown:
+ return "Unknown";
+ case TensorComponent::OffsetFirstElement:
+ return "OffsetFirstElement";
+ case TensorComponent::Stride1:
+ return "Stride1";
+ case TensorComponent::Stride2:
+ return "Stride2";
+ case TensorComponent::Stride3:
+ return "Stride3";
+ case TensorComponent::Stride4:
+ return "Stride4";
+ case TensorComponent::Dim0:
+ return "Dim0";
+ case TensorComponent::Dim1:
+ return "Dim1";
+ case TensorComponent::Dim2:
+ return "Dim2";
+ case TensorComponent::Dim3:
+ return "Dim3";
+ case TensorComponent::Dim4:
+ return "Dim4";
+ case TensorComponent::Dim1xDim2:
+ return "Dim1xDim2";
+ case TensorComponent::Dim1xDim2xDim3:
+ return "Dim1xDim2xDim3";
+ default:
+ assert(false);
+ }
+}
+
+class ITensorArgument
+{
+public:
+ virtual ~ITensorArgument() = default;
+ /** Method to get the tensor component as a string
+ *
+ * @param[in] x tensor component to query
+ *
+ * @return the tensor component as a string
+ */
+ virtual std::string component(TensorComponent x) = 0;
+ /** Method to get the tensor component type declaration as a string
+ *
+ * @return the tensor component type declaration as a string
+ */
+ virtual std::string component_type_declaration() const = 0;
+ /** Method to get the tensor component data type
+ *
+ * @return the tensor component data type
+ */
+ virtual DataType component_data_type() const = 0;
+ /** Method to get the tensor component declarations
+ *
+ * @return a vector containing the tensor component declarations
+ */
+ virtual std::vector<TensorComponent> component_declarations() const = 0;
+ /** Method to get the name of the tensor argument.
+ *
+ * @return the name of the tensor argument
+ */
+ std::string name() const
+ {
+ return _basename;
+ }
+ /** Method to get the tensor format
+ *
+ * @return the format
+ */
+ TensorInfo format() const
+ {
+ return _format;
+ }
+
+protected:
+ TensorInfo _format { };
+ std::string _basename {};
+};
+
+enum class GpuTensorStorage : int32_t
+{
+ Unknown = 0x0000,
+ BufferUint8Ptr = 0x0012,
+ Image2dReadOnly = 0x0020,
+ Image2dWriteOnly = 0x0021,
+ Image3dReadOnly = 0x0030,
+ Image3dWriteOnly = 0x0031
+};
+
+class IGpuTensorArgument : public ITensorArgument
+{
+public:
+ virtual ~IGpuTensorArgument() = default;
+ /** Method to get the tensor storage, which is the underlying storage used to keep the data memory
+ *
+ * @param[in] x tensor storage to query
+ *
+ * @return the tensor storage as a string
+ */
+ virtual std::string storage(GpuTensorStorage x) = 0;
+ /** Method to get the tensor storage type declaration as a string
+ *
+ * @param[in] x tensor component to query
+ *
+ * @return the tensor storage type declaration as a string
+ */
+ virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0;
+ /** Method to get the tensor storage declarations
+ *
+ * @return a vector containing the tensor storage declarations
+ */
+ virtual std::vector<GpuTensorStorage> storage_declarations() const = 0;
+};
+
+class ClTensorArgument : public IGpuTensorArgument
+{
+public:
+ ClTensorArgument(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible)
+ {
+ _basename = name;
+ _format = x;
+ _return_by_value_when_possible = return_by_value_when_possible;
+ }
+
+ // Methods to override
+ std::string component(TensorComponent x) override
+ {
+ if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::Constant)))
+ {
+ int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
+ return std::to_string(idx - 1);
+ }
+
+ if(_return_by_value_when_possible)
+ {
+ if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::Dimension)))
+ {
+ int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
+ return std::to_string(_format.shape[idx]);
+ }
+
+ if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::FoldedDimension)))
+ {
+ switch(x)
+ {
+ case TensorComponent::Dim1xDim2:
+ return std::to_string(_format.shape[1] * _format.shape[2]);
+ case TensorComponent::Dim1xDim2xDim3:
+ return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]);
+ default:
+ std::cout << "Unsupported folded dimension" << std::endl;
+ assert(false);
+ }
+ }
+ }
+
+ if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
+ {
+ _components_required.push_back(x);
+ }
+
+ return build_component_name(x);
+ }
+
+ std::string component_type_declaration() const override
+ {
+ return "int";
+ };
+
+ DataType component_data_type() const override
+ {
+ return DataType::Int32;
+ }
+
+ std::string storage(GpuTensorStorage x) override
+ {
+ if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
+ {
+ _storage_required.push_back(x);
+ }
+
+ return build_storage_name(x);
+ }
+
+ std::string storage_type_declaration(GpuTensorStorage x) const override
+ {
+ switch(x)
+ {
+ case GpuTensorStorage::BufferUint8Ptr:
+ return "__global uchar*";
+ case GpuTensorStorage::Image2dReadOnly:
+ return "__read_only image2d_t";
+ case GpuTensorStorage::Image2dWriteOnly:
+ return "__write_only image2d_t";
+ case GpuTensorStorage::Image3dReadOnly:
+ return "__read_only image3d_t ";
+ case GpuTensorStorage::Image3dWriteOnly:
+ return "__write_only image3d_t ";
+ default:
+ std::cout << "Unsupported storage" << std::endl;
+ assert(false);
+ }
+ };
+
+ std::vector<GpuTensorStorage> storage_declarations() const override
+ {
+ return _storage_required;
+ }
+
+ std::vector<TensorComponent> component_declarations() const override
+ {
+ return _components_required;
+ }
+
+private:
+ std::string build_storage_name(GpuTensorStorage x) const
+ {
+ std::string var_name = _basename;
+
+ switch(x)
+ {
+ case GpuTensorStorage::BufferUint8Ptr:
+ return var_name + "_ptr";
+ case GpuTensorStorage::Image2dReadOnly:
+ case GpuTensorStorage::Image2dWriteOnly:
+ return var_name + "_img2d";
+ case GpuTensorStorage::Image3dReadOnly:
+ case GpuTensorStorage::Image3dWriteOnly:
+ return var_name + "_img3d";
+ default:
+ std::cout << "Unsupported storage" << std::endl;
+ assert(false);
+ }
+
+ return var_name;
+ }
+
+ std::string build_component_name(TensorComponent x) const
+ {
+ std::string var_name = _basename;
+
+ switch(x)
+ {
+ case TensorComponent::OffsetFirstElement:
+ return var_name + "_offset_first_element";
+ case TensorComponent::Stride1:
+ return var_name + "_stride1";
+ case TensorComponent::Stride2:
+ return var_name + "_stride2";
+ case TensorComponent::Stride3:
+ return var_name + "_stride3";
+ case TensorComponent::Dim0:
+ return var_name + "_dim0";
+ case TensorComponent::Dim1:
+ return var_name + "_dim1";
+ case TensorComponent::Dim2:
+ return var_name + "_dim2";
+ case TensorComponent::Dim3:
+ return var_name + "_dim3";
+ case TensorComponent::Dim1xDim2:
+ return var_name + "_dim1xdim2";
+ case TensorComponent::Dim1xDim2xDim3:
+ return var_name + "_dim1xdim2xdim3";
+ default:
+ std::cout << "Unsupported component" << std::endl;
+ assert(false);
+ }
+
+ return var_name;
+ }
+
+ bool _return_by_value_when_possible { false };
+ std::vector<GpuTensorStorage> _storage_required {};
+ std::vector<TensorComponent> _components_required {};
+};
+
+/**
+ * @brief Data structure that contains the declared tiles by the components.
+ * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to
+ * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack
+ * and remove (pop) all the tiles from the level above.
+ * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code.
+ * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions
+ * when declaring tiles among different components.
+ *
+ */
+class GpuTileRegistry
+{
+public:
+enum class RegistryTileType
+{
+ Tile,
+ Link
+};
+
+using RegistryIdSpace = int32_t;
+using RegistryLevel = int32_t;
+using RegistryTileName = std::string;
+
+struct RegistryTileTableEntry
+{
+ RegistryLevel registry_level { 0 };
+ std::unique_ptr<IVectorTile> tile_object { nullptr };
+};
+
+struct RegistryTileTypeTableEntry
+{
+ RegistryTileType tile_type { RegistryTileType::Tile };
+ RegistryTileName tile_name {};
+ RegistryIdSpace registry_idspace { 0 };
+ RegistryLevel registry_level { 0 };
+};
+
+using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
+using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>;
+ /**
+ * @brief Construct a new Gpu Tile Registry object
+ *
+ */
+ GpuTileRegistry()
+ {
+ _language = GpuTargetLanguage::Unknown;
+ }
+ /**
+ * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language
+ *
+ * @param[in] language Gpu programming language to use
+ */
+ GpuTileRegistry(GpuTargetLanguage language)
+ {
+ _language = language;
+ }
+ /**
+ * @brief Default destructor. Destroy the Gpu Tile Registry object
+ *
+ */
+ ~GpuTileRegistry() = default;
+ /**
+ * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles.
+ * Therefore, the IdSpace should be set before declaring any tiles.
+ *
+ * @param[in] id The IdSpace id
+ */
+ void set_IdSpace(int32_t id)
+ {
+ _IdSpace = id;
+ }
+ /**
+ * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles
+ *
+ * @return The IdSpace id
+ */
+ int32_t IdSpace() const
+ {
+ return _IdSpace;
+ }
+ /**
+ * @brief Gets all the IdSpace declarations defined in the tile registry.
+ *
+ * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
+ */
+ std::vector<int32_t> IdSpace_declarations() const
+ {
+ std::vector<int32_t> x;
+
+ auto it = _frags.begin();
+
+ while (it != _frags.end())
+ {
+ x.push_back(it->first);
+
+ it++;
+ }
+
+ return x;
+ }
+ /**
+ * @brief Declare a tile from a previously created tile
+ */
+ void insert(const std::string& name, const IVectorTile *frag)
+ {
+ assert(_language == GpuTargetLanguage::OpenCL);
+ const int32_t key_IdSpace = _IdSpace;
+ const std::string key_var_name = name;
+ const std::string var_name = frag->name();
+ TileInfo format = frag->format();
+
+ // First check whether a tile with the same name exists
+ IVectorTile *result = (*this)[key_var_name];
+ assert(result == nullptr);
+ if(result == nullptr)
+ {
+ std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
+
+ _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
+ _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
+
+ _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link;
+ _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
+ _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
+ _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
+ }
+ }
+ /**
+ * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace()
+ *
+ * @note The reference name used for declaring the tile should not be previously used in the IdSpace
+ *
+ * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
+ * @param[in] format Tile format use to use
+ */
+ void insert(const std::string& name, const TileInfo& format)
+ {
+ assert(_language == GpuTargetLanguage::OpenCL);
+ const int32_t key_IdSpace = _IdSpace;
+ const std::string key_var_name = name;
+ const std::string var_name = generate_tile_name(name);
+
+ // First check whether a tile with the same name exists
+ IVectorTile *result = (*this)[key_var_name];
+ assert(result == nullptr);
+ if(result == nullptr)
+ {
+ std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
+ _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
+ _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
+
+ _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
+ _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
+ _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
+ _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
+ }
+ }
+ /**
+ * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string
+ *
+ * @note The reference name used for declaring the tile should not be previously used in the IdSpace
+ *
+ * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
+ * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
+ * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure
+ * that the data type is aligned with the content of the std::string.
+ */
+ void insert(const std::string& name, const std::vector<std::vector<std::string>>& in, DataType dt)
+ {
+ assert(_language == GpuTargetLanguage::OpenCL);
+ const int32_t key_IdSpace = _IdSpace;
+ const std::string key_var_name = name;
+
+ // First check whether a tile with the same name exists
+ IVectorTile *result = (*this)[key_var_name];
+ assert(result == nullptr);
+ if(result == nullptr)
+ {
+ std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
+ _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
+ _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
+
+ _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
+ _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
+ _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
+ _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
+ }
+ }
+ /**
+ * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string
+ *
+ * @note This method can be used to declare temporary tiles that need to be accessed only once.
+ *
+ * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
+ * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure
+ * that the data type is aligned with what passed with the std::string.
+ *
+ * @return IVectorTile* the anonymous constant tile
+ */
+ IVectorTile* insert(const std::vector<std::vector<std::string>>& in, DataType dt)
+ {
+ assert(_language == GpuTargetLanguage::OpenCL);
+ const int32_t key_IdSpace = _IdSpace;
+ const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++);
+
+ // First check whether a tile with the same name exists
+ IVectorTile *result = (*this)[key_var_name];
+ assert(result == nullptr);
+ if(result == nullptr)
+ {
+ std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
+ _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
+ _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
+
+ _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
+ _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
+ _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
+ _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
+ }
+
+ return (*this)[key_var_name];
+ }
+ /**
+ * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user
+ *
+ * @param[in] name The name of the tile to retrieve
+ * @param[in] IdSpace The IdSpace id where to search the tile
+ *
+ * @return IVectorTile* The tile
+ */
+ IVectorTile* get(const std::string& name, int32_t IdSpace)
+ {
+ const int32_t key_IdSpace = IdSpace;
+ const std::string key_var_name = name;
+
+ IVectorTile* result = nullptr;
+ auto search_IdSpace = _frags.find(key_IdSpace);
+ if(search_IdSpace != _frags.end())
+ {
+ auto search_tile = _frags[key_IdSpace].find(key_var_name);
+ if(search_tile != _frags[key_IdSpace].end())
+ {
+ result = search_tile->second.tile_object.get();
+ assert(result != nullptr);
+ }
+ }
+
+ return result;
+ }
+ /**
+ * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace()
+ *
+ * @param[in] name The name of the tile to retrieve
+ *
+ * @return IVectorTile* The tile
+ */
+ IVectorTile* operator[](const std::string& name)
+ {
+ return get(name, _IdSpace);
+ }
+ /**
+ * @brief Check whether the tile in the in the IdSpace provided by the user exists
+ *
+ * @param[in] name Name of the tile to search for
+ * @param[in] IdSpace The IdSpace id where to search the tile
+ *
+ * @return true if the tile exists
+ * @return false if the tile does not exist
+ */
+ bool has_tile(const std::string& name, int32_t IdSpace) const
+ {
+ const int32_t key_IdSpace = IdSpace;
+ const std::string key_var_name = name;
+
+ // IVectorTile* result = nullptr;
+ auto search_IdSpace = _frags.find(key_IdSpace);
+
+ return search_IdSpace != _frags.end();
+ }
+ /**
+ * @brief Check whether the tile within the current IdSpace exists
+ *
+ * @param[in] name Name of the tile to search for
+ *
+ * @return true if the tile exists
+ * @return false if the tile does not exist
+ */
+ bool has_tile(const std::string& name) const
+ {
+ return has_tile(name, _IdSpace);
+ }
+ /**
+ * @brief Get all the tiles declared within the IdSpace provided by the user
+ *
+ * @param[in] IdSpace IdSpace where to retrieve all the declared tiles
+ *
+ * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user
+ */
+ std::vector<IVectorTile*> tile_declarations(int32_t IdSpace)
+ {
+ std::vector<IVectorTile*> tiles;
+
+ std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
+
+ while (it != _frag_types[IdSpace].end())
+ {
+ // The following line should be enabled. However, we cannot at this stage
+ // because it used to retrieve the output tile produced by each component.
+ // However, this method should NOT be used to retrieve the output tile
+ //if(it->second.tile_type == RegistryTileType::Tile)
+ {
+ tiles.push_back(get(it->second.tile_name, it->second.registry_idspace));
+ }
+ it++;
+ }
+
+ return tiles;
+ }
+ /**
+ * @brief Increase the level of stack.
+ *
+ */
+ void increment_registry_level()
+ {
+ _registry_level++;
+ }
+ /**
+ * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack.
+ *
+ */
+ void decrement_registry_level()
+ {
+ assert(_registry_level >= 0);
+
+ // Remove all variables in the local scope
+ std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
+
+ while (it != _frags[_IdSpace].end())
+ {
+ if (it->second.registry_level == _registry_level)
+ {
+ it = _frags[_IdSpace].erase(it);
+ }
+ else
+ {
+ it++;
+ }
+ }
+
+ std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
+
+ while (it_type != _frag_types[_IdSpace].end())
+ {
+ if (it_type->second.registry_level == _registry_level)
+ {
+ it_type = _frag_types[_IdSpace].erase(it_type);
+ }
+ else
+ {
+ it_type++;
+ }
+ }
+
+ _registry_level--;
+ }
+ /**
+ * @brief Get the level of the stack
+ *
+ */
+ int32_t level() const
+ {
+ return _registry_level;
+ }
+
+private:
+ // This method ensures that the key is unique among different components
+ std::string generate_tile_name(const std::string& name)
+ {
+ assert(_IdSpace >= 0 );
+ if(_registry_level == 0)
+ {
+ return "_G" + std::to_string(_IdSpace) + "_" + name;
+ }
+ else
+ {
+ return name;
+ }
+ }
+ RegistryTileTable _frags {};
+ RegistryTileTypeTable _frag_types {};
+ RegistryLevel _registry_level { 0 };
+ RegistryIdSpace _IdSpace { -1 };
+ int32_t _anonymous_frag_count { 0 }; // Counter used to create the anonymous tiles
+ GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language
+};
+
+using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
+
+/**
+ * @brief Data structure that contains the tensors consumed by the components.
+ * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions
+ * when declaring tensors among different components.
+ *
+ */
+class GpuTensorArgumentRegistry
+{
+public:
+ /**
+ * @brief Construct a new Gpu Tensor Registry object
+ *
+ */
+ GpuTensorArgumentRegistry()
+ {
+ _language = GpuTargetLanguage::Unknown;
+ }
+ /**
+ * @brief Construct a new Gpu Tensor Registry object
+ *
+ * @param[in] language Gpu programming language to use
+ */
+ GpuTensorArgumentRegistry(GpuTargetLanguage language)
+ {
+ _language = language;
+ }
+ /**
+ * @brief Default destructor. Destroy the Gpu Tensor Registry object
+ *
+ */
+ ~GpuTensorArgumentRegistry() = default;
+ /**
+ * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors.
+ * Therefore, the IdSpace should be set before declaring any tensors.
+ *
+ * @param[in] id The IdSpace id
+ */
+ void set_IdSpace(int32_t id)
+ {
+ _IdSpace = id;
+ }
+ /**
+ * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors
+ *
+ * @return The IdSpace id
+ */
+ int32_t IdSpace() const
+ {
+ return _IdSpace;
+ }
+ /**
+ * @brief Gets all the IdSpace declarations defined in the tensor registry.
+ *
+ * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
+ */
+ std::vector<int32_t> IdSpace_declarations() const
+ {
+ std::vector<int32_t> x;
+
+ auto it = _refs.begin();
+
+ while (it != _refs.end())
+ {
+ x.push_back(it->first);
+
+ it++;
+ }
+
+ return x;
+ }
+ /**
+ * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace()
+ *
+ * @note The reference name used for declaring the tensor should not be previously used in the IdSpace
+ *
+ * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry.
+ * @param[in] x Pair of tensor info and tensor id
+ * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components
+ */
+ void insert(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible)
+ {
+ assert(_language == GpuTargetLanguage::OpenCL);
+ const int32_t key_IdSpace = _IdSpace;
+ const int32_t tensor_id = x.id;
+ const std::string key_var_name = name;
+ const std::string var_name = generate_tensor_name(name, tensor_id);
+
+ // First, check whether the tensor has already a reference. If so, trigger an assert
+ assert(!has_tensor_argument(name));
+
+ // Check whether a tensor with that tensorID exists
+ auto result = _tensor_arguments.find(tensor_id);
+ if(result == _tensor_arguments.end())
+ {
+ // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
+ std::unique_ptr<ClTensorArgument> arg = std::make_unique<ClTensorArgument>(var_name, x, return_by_value_when_possible);
+ _tensor_arguments[tensor_id] = std::move(arg);
+ }
+
+ _refs[key_IdSpace][key_var_name] = tensor_id;
+ }
+ /**
+ * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace()
+ *
+ * @param[in] name The name of the tensor to retrieve
+ *
+ * @return IGpuTensor* The tensor
+ */
+ IGpuTensorArgument* operator[](const std::string& name)
+ {
+ const int32_t key_IdSpace = _IdSpace;
+ const std::string key_var_name = name;
+
+ IGpuTensorArgument* result = nullptr;
+ auto search_IdSpace = _refs.find(key_IdSpace);
+ if(search_IdSpace != _refs.end())
+ {
+ auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
+
+ if(search_tensor_id != _refs[key_IdSpace].end())
+ {
+ const int32_t tensor_id = search_tensor_id->second;
+ auto search_tensor_argument = _tensor_arguments.find(tensor_id);
+ if(search_tensor_argument != _tensor_arguments.end())
+ {
+ result = search_tensor_argument->second.get();
+ }
+ assert(result != nullptr);
+ }
+ }
+
+ return result;
+ }
+ /**
+ * @brief Get all the tensors declared in the IdSpace provided by the user
+ *
+ * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors
+ */
+ std::vector<IGpuTensorArgument*> tensor_argument_declarations()
+ {
+ std::vector<IGpuTensorArgument*> args;
+
+ auto it = _tensor_arguments.begin();
+
+ while (it != _tensor_arguments.end())
+ {
+ args.push_back(it->second.get());
+ it++;
+ }
+
+ return args;
+ }
+ /**
+ * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists
+ *
+ * @param[in] name Name of the tensor argument to search for
+ *
+ * @return true if the tensor argument exists
+ * @return false if the tensor argument does not exist
+ */
+ bool has_tensor_argument(const std::string& name)
+ {
+ const int32_t key_IdSpace = _IdSpace;
+ const std::string key_var_name = name;
+
+ auto search_IdSpace = _refs.find(key_IdSpace);
+
+ if(search_IdSpace != _refs.end())
+ {
+ auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
+
+ return search_tensor_id != _refs[key_IdSpace].end();
+ }
+ else
+ {
+ return false;
+ }
+ }
+ /**
+ * @brief Check whether the tensor argument is in the the IdSpace provided by the user
+ *
+ * @param[in] name Name of the tensor argument to search for
+ * @param[in] IdSpace The IdSpace id where to search the tensor argument
+ *
+ * @return true if the tile exists
+ * @return false if the tile does not exist
+ */
+ bool has_tensor_argument(const std::string& name, int32_t IdSpace)
+ {
+ const int32_t key_IdSpace = IdSpace;
+ const std::string key_var_name = name;
+
+ auto search_IdSpace = _refs.find(key_IdSpace);
+
+ if(search_IdSpace != _refs.end())
+ {
+ auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
+
+ return search_tensor_id != _refs[key_IdSpace].end();
+ }
+ else
+ {
+ return false;
+ }
+ }
+private:
+ // This method ensures that the key is unique among different components
+ std::string generate_tensor_name(const std::string& name, int32_t tensor_id)
+ {
+ assert(tensor_id >= 0 );
+
+ return name + std::to_string(tensor_id);
+ }
+
+ std::map<int32_t, TensorEntry> _tensor_arguments {};
+ std::map<int32_t, std::map<std::string, int32_t>> _refs {};
+ int32_t _IdSpace { -1 };
+ GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language
+};
+
+enum class OpType : int32_t
+{
+ Elementwise = 0x0000,
+ Relational = 0x1000,
+ Algebra = 0x2000
+};
+
+inline std::string to_string(AssignmentOp op)
+{
+ switch(op)
+ {
+ case AssignmentOp::Decrement:
+ return "-=";
+ case AssignmentOp::Increment:
+ return "+=";
+
+ default:
+ assert(false);
+ }
+}
+
+inline std::string to_string(BinaryOp op)
+{
+ switch(op)
+ {
+ case BinaryOp::Add:
+ return "+";
+ case BinaryOp::Sub:
+ return "-";
+ case BinaryOp::Mul:
+ return "*";
+ case BinaryOp::Div:
+ return "/";
+ case BinaryOp::Mod:
+ return "%";
+ case BinaryOp::Equal:
+ return "==";
+ case BinaryOp::Less:
+ return "<";
+ case BinaryOp::LessEqual:
+ return "<=";
+ case BinaryOp::Greater:
+ return ">";
+ case BinaryOp::GreaterEqual:
+ return ">=";
+ case BinaryOp::LogicalAnd:
+ return "&&";
+ case BinaryOp::LogicalOr:
+ return "||";
+ case BinaryOp::LogicalNot:
+ return "!";
+ default:
+ assert(false);
+ }
+}
+
+inline std::string binary_op_string(BinaryOp op)
+{
+ switch(op)
+ {
+ case BinaryOp::Add:
+ return "add";
+ case BinaryOp::Sub:
+ return "sub";
+ case BinaryOp::Mul:
+ return "mul";
+ case BinaryOp::Div:
+ return "div";
+ case BinaryOp::Mod:
+ return "mod";
+ case BinaryOp::Equal:
+ return "eq";
+ case BinaryOp::Less:
+ return "gt";
+ case BinaryOp::LessEqual:
+ return "gteq";
+ case BinaryOp::Greater:
+ return "lt";
+ case BinaryOp::GreaterEqual:
+ return "lte";
+ default:
+ assert(false);
+ }
+}
+
+enum class OperandType : int32_t
+{
+ Unknown = 0x00000000,
+ ScalarFp32 = 0x00001011, // Immediate scalar tile
+ ScalarFp16 = 0x00001012, // Immediate scalar tile
+ ScalarInt32 = 0x00001021, // Immediate scalar tile
+ ScalarInt16 = 0x00001022, // Immediate scalar tile
+ ScalarInt8 = 0x00001024, // Immediate scalar tile
+ ScalarUInt32 = 0x00001031, // Immediate scalar tile
+ ScalarUInt16 = 0x00001032, // Immediate scalar tile
+ ScalarUInt8 = 0x00001034, // Immediate scalar tile
+ ScalarBool = 0x00001041, // Immediate scalar tile
+ ScalarTile = 0x00001050, // Scalar from a tile
+ Tile = 0x00010000, // Tile
+ TensorStride1 = 0x00100001, // Tensor component
+ TensorStride2 = 0x00100002, // Tensor component
+ TensorStride3 = 0x00100003, // Tensor component
+ TensorStride4 = 0x00100004, // Tensor component
+ TensorDim0 = 0x00100010, // Tensor component
+ TensorDim1 = 0x00100020, // Tensor component
+ TensorDim2 = 0x00100030, // Tensor component
+ TensorDim3 = 0x00100040, // Tensor component
+ TensorDim4 = 0x00100050, // Tensor component
+ TensorC = 0x00100010, // Tensor component
+ TensorW = 0x00100020, // Tensor component
+ TensorH = 0x00100030, // Tensor component
+ TensorD = 0x00100040, // Tensor component
+ TensorN = 0x00100050, // Tensor component
+ TensorDim1xDim2 = 0x00100100, // Tensor component
+ TensorDim1xDim2xDim3 = 0x00100200, // Tensor component
+ TensorWxH = 0x00100300, // Tensor component
+ TensorWxHxD = 0x00100400, // Tensor component
+ TensorDataOffset = 0x00100500, // Tensor component
+};
+
+struct ScalarTileCoord
+{
+ ScalarTileCoord() {}
+ ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0) {}
+ int32_t x { -1 };
+ int32_t y { -1 };
+};
+/**
+ * @brief Operand class. This object is used to pass the operands to the operations performed by the writer.
+ * Operand can be of three types:
+ * -# Scalar immediate: constant expression
+ * -# Tile: A tile
+ * -# Tensor component: A component (scalar) of a tensor
+ *
+ */
+class Operand
+{
+public:
+ Operand(const std::string &val)
+ {
+ _str = val;
+ _type = OperandType::Tile;
+ }
+
+ Operand(const std::string &val, const ScalarTileCoord& coord)
+ {
+ _str = val;
+ _type = OperandType::ScalarTile;
+ _coord = coord;
+ }
+
+ Operand(const std::string &val, OperandType type)
+ {
+ _str = val;
+ _type = type;
+ }
+
+ Operand(const Operand& t)
+ {
+ _str = t.value();
+ _type = t.type();
+ }
+
+ Operand& operator=(const Operand& t)
+ {
+ _str = t.value();
+ _type = t.type();
+ _coord = t.scalar_tile_coordinate();
+ return *this;
+ }
+
+ std::string value() const
+ {
+ return _str;
+ }
+
+ OperandType type() const
+ {
+ return _type;
+ }
+
+ ScalarTileCoord scalar_tile_coordinate() const
+ {
+ return _coord;
+ }
+
+private:
+ std::string _str {};
+ OperandType _type { OperandType::Unknown };
+ ScalarTileCoord _coord {};
+};
+
+enum class GpuSamplerTensorStorage : int32_t
+{
+ Unknown = static_cast<int32_t>(GpuTensorStorage::Unknown),
+ BufferUint8Ptr = static_cast<int32_t>(GpuTensorStorage::BufferUint8Ptr),
+ Image2dReadOnly = static_cast<int32_t>(GpuTensorStorage::Image2dReadOnly),
+ Image2dWriteOnly = static_cast<int32_t>(GpuTensorStorage::Image2dWriteOnly),
+ Image3dReadOnly = static_cast<int32_t>(GpuTensorStorage::Image3dReadOnly),
+ Image3dWriteOnly = static_cast<int32_t>(GpuTensorStorage::Image2dWriteOnly),
+};
+
+struct GpuSampler
+{
+ GpuSampler() = default;
+ TensorSamplerFormat format { TensorSamplerFormat::Unknown };
+ GpuSamplerTensorStorage storage { GpuSamplerTensorStorage::Unknown };
+ TensorSamplerAddressModeX address_mode_x { TensorSamplerAddressModeX::Unknown };
+ TensorSamplerAddressModeY address_mode_y { TensorSamplerAddressModeY::Unknown };
+ TensorSamplerAddressModeZ address_mode_z { TensorSamplerAddressModeZ::Unknown };
+};
+
+inline GpuSampler create_simple_sampler(const TensorInfo* tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z)
+{
+ auto tensor = tensor_info_id->shape;
+
+ GpuSampler dst_sampler;
+ dst_sampler.format = sampler.format;
+ dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr;
+ dst_sampler.address_mode_x = sampler.address_mode_x;
+ dst_sampler.address_mode_y = sampler.address_mode_y;
+ dst_sampler.address_mode_z = sampler.address_mode_z;
+
+ int32_t dim_x = 0;
+ int32_t dim_y = 0;
+ int32_t dim_z = 0;
+
+ switch(sampler.format)
+ {
+ case TensorSamplerFormat::C_W_H:
+ dim_x = tensor[0];
+ dim_y = tensor[1];
+ dim_z = tensor[2];
+ break;
+ case TensorSamplerFormat::C_WH_1:
+ dim_x = tensor[0];
+ dim_y = tensor[1] * tensor[2];
+ dim_z = 1;
+ break;
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ break;
+ }
+
+ if(dim_x == 1)
+ {
+ assert(step_x == 1);
+ dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
+ }
+
+ if(dim_y == 1)
+ {
+ assert(step_y == 1);
+ dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
+ }
+
+ if(dim_z == 1)
+ {
+ assert(step_z == 1);
+ dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
+ }
+
+ return dst_sampler;
+}
+
+class GpuOutputSampler
+{
+public:
+ GpuOutputSampler() = default;
+ /**
+ * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once
+ * by the root component. Once initialized, all simpler components will need to used this sampler
+ * or a broadcasted version of it
+ *
+ * @param[in] sampler GpuSampler
+ * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile!
+ * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
+ * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
+ */
+ void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z)
+ {
+ assert(_is_initialized == false);
+
+ _step_x = step_x;
+ _step_y = step_y;
+ _step_z = step_z;
+ _tensor_info_id = tensor_info_id;
+ _sampler = create_sampler(tensor_storage, tensor_format);
+ _is_initialized = true;
+ };
+
+ GpuSampler sampler() const
+ {
+ return _sampler;
+ };
+
+ int32_t step_x() const
+ {
+ return _step_x;
+ };
+
+ int32_t step_y() const
+ {
+ return _step_y;
+ };
+
+ int32_t step_z() const
+ {
+ return _step_z;
+ };
+private:
+ GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format)
+ {
+ // Output can only be in output mode
+ assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly);
+ assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly);
+
+ auto tensor = _tensor_info_id->shape;
+
+ GpuSampler sampler;
+ sampler.format = tensor_format;
+ sampler.storage = tensor_storage;
+ sampler.address_mode_x = TensorSamplerAddressModeX::None;
+ sampler.address_mode_y = TensorSamplerAddressModeY::None;
+ sampler.address_mode_z = TensorSamplerAddressModeZ::None;
+
+ // In the case of texture, we do not need any special checks at the border
+ if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
+ {
+ int32_t dim_x = 0;
+ int32_t dim_y = 0;
+ int32_t dim_z = 0;
+
+ switch(tensor_format)
+ {
+ case TensorSamplerFormat::C_W_H:
+ dim_x = tensor[0];
+ dim_y = tensor[1];
+ dim_z = tensor[2];
+ break;
+ case TensorSamplerFormat::C_WH_1:
+ dim_x = tensor[0];
+ dim_y = tensor[1] * tensor[2];
+ dim_z = 1;
+ break;
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ break;
+ }
+
+ if((dim_x % _step_x) != 0 && dim_x != 1)
+ {
+ sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
+ }
+
+ if((dim_y % _step_y) != 0 && dim_y != 1)
+ {
+ sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
+ }
+
+ if((dim_z % _step_z) != 0 && dim_z != 1)
+ {
+ sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
+ }
+ }
+
+ return sampler;
+ }
+ GpuSampler _sampler { }; // GpuSampler
+ int32_t _step_x { 1 };
+ int32_t _step_y { 1 };
+ int32_t _step_z { 1 };
+ const TensorInfo* _tensor_info_id { nullptr };
+ bool _is_initialized { false };
+};
+
+/**
+ * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer.
+ */
+class TensorOperand
+{
+public:
+ TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler)
+ {
+ }
+
+ TensorOperand& operator=(const TensorOperand& t)
+ {
+ _str = t.value();
+ _sampler = t.sampler();
+ return *this;
+ }
+
+ std::string value() const
+ {
+ return _str;
+ }
+
+ GpuSampler sampler() const
+ {
+ return _sampler;
+ }
+
+private:
+ std::string _str {};
+ GpuSampler _sampler {};
+};
+
+/**
+ * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer
+ * This data structure must be initialized before being passed to the Gpu Kernel Writer
+ *
+ */
+class GpuKernelWriterDataHolder
+{
+public:
+ /**
+ * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store
+ * the GPU target and target specific capabilities (extensions). For now, we just initialize the
+ * programming language
+ *
+ * @param[in] language Gpu programming language to use
+ */
+ GpuKernelWriterDataHolder(GpuTargetLanguage language) : tiles(language), arguments(language), code(""), _language(language)
+ {
+ }
+ /**
+ * @brief Get the Gpu programming language used
+ *
+ * @return GpuTargetLanguage the Gpu programming language
+ */
+ GpuTargetLanguage programming_language() const
+ {
+ return _language;
+ }
+ /**
+ * @brief @ref GpuTileRegistry
+ *
+ */
+ GpuTileRegistry tiles{};
+ /**
+ * @brief @ref GpuTensorArgumentRegistry
+ *
+ */
+ GpuTensorArgumentRegistry arguments{};
+ /**
+ * @brief @ref GpuOutputSampler.
+ *
+ */
+ GpuOutputSampler output_sampler{};
+ /**
+ * @brief Source code
+ *
+ */
+ std::string code{};
+
+ // GpuExtensionRegistry extensions{};
+private:
+ GpuTargetLanguage _language;
+};
+
+struct LWS
+{
+ int32_t x {1};
+ int32_t y {1};
+ int32_t z {1};
+};
+
+/**
+ * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker
+ * declare an anonymous tile in the tile registry.
+ */
+class OperandUnpacker
+{
+public:
+ OperandUnpacker(GpuTileRegistry& tiles, GpuTensorArgumentRegistry& arguments) : _tiles(tiles), _arguments(arguments)
+ {
+ // Increase the level of the stack to allocate possible temporary tiles
+ _tiles.increment_registry_level();
+ };
+
+ ~OperandUnpacker()
+ {
+ // Decrease the level of the stack to deallocate any temporary tiles
+ _tiles.decrement_registry_level();
+ }
+
+ IVectorTile* unpack(const Operand& src)
+ {
+ // Get the tile
+ if(src.type() == OperandType::Tile)
+ {
+ assert(_tiles.has_tile(src.value()));
+ return _tiles[src.value()];
+ }
+ // Create an anonymous tile with a constant
+ else if(static_cast<int32_t>(src.type()) & 0x00001000)
+ {
+ if(src.type() == OperandType::ScalarTile)
+ {
+ ScalarTileCoord coord = src.scalar_tile_coordinate();
+ assert(_tiles.has_tile(src.value()));
+ assert(coord.x >= 0);
+ assert(coord.y >= 0);
+ auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
+ return _tiles.insert({{{val.str}}}, val.type.dt);
+ }
+ else
+ {
+ return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type()));
+ }
+ }
+ // Create an anonymous tile with the tensor component
+ else
+ {
+ assert(_arguments.has_tensor_argument(src.value()));
+ auto x = _arguments[src.value()];
+ const std::string val = x->component(to_tensor_component(src.type()));
+ const DataType dt = x->component_data_type();
+ return _tiles.insert({{{val}}}, dt);
+ }
+ }
+
+private:
+ DataType to_tile_data_type(OperandType x)
+ {
+ return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff);
+ }
+
+ TensorComponent to_tensor_component(OperandType x)
+ {
+ switch(x)
+ {
+ case OperandType::TensorDim0:
+ return TensorComponent::Dim0;
+ case OperandType::TensorDim1:
+ return TensorComponent::Dim1;
+ case OperandType::TensorDim2:
+ return TensorComponent::Dim2;
+ case OperandType::TensorDim3:
+ return TensorComponent::Dim3;
+ case OperandType::TensorDim4:
+ return TensorComponent::Dim4;
+ case OperandType::TensorStride1:
+ return TensorComponent::Stride1;
+ case OperandType::TensorStride2:
+ return TensorComponent::Stride2;
+ case OperandType::TensorStride3:
+ return TensorComponent::Stride3;
+ case OperandType::TensorStride4:
+ return TensorComponent::Stride4;
+ case OperandType::TensorDim1xDim2:
+ return TensorComponent::Dim1xDim2;
+ case OperandType::TensorDim1xDim2xDim3:
+ return TensorComponent::Dim1xDim2xDim3;
+ case OperandType::TensorDataOffset:
+ return TensorComponent::OffsetFirstElement;
+ default:
+ assert(false);
+ }
+ }
+
+ GpuTileRegistry& _tiles;
+ GpuTensorArgumentRegistry& _arguments;
+};
+
+/**
+ * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker
+ * declare an anonymous tile in the tile registry.
+ * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure.
+ */
+class TensorOperandUnpacker
+{
+public:
+ TensorOperandUnpacker(GpuTensorArgumentRegistry& arguments) : _arguments(arguments)
+ {
+ };
+
+ IGpuTensorArgument* unpack(const TensorOperand& src)
+ {
+ assert(_arguments.has_tensor_argument(src.value()));
+ return _arguments[src.value()];
+ }
+
+private:
+ GpuTensorArgumentRegistry& _arguments;
+};
+
+/**
+ * @brief The GpuKernel will be used in three occasions (stages):
+ * #- Compilation stage
+ * #- Tuning stage
+ * #- Dispatch stage
+ */
+struct GpuKernel
+{
+ // Compilation stage
+ std::string code {}; // Source code, required for the compilation stage
+ std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage
+ // Tuning stage
+ std::string config_id {}; // Unique id, required for the tuning stage
+ std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage
+ // Dispatch stage
+ GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
+ std::vector<std::pair<int32_t, GpuTensorStorage>> list_tensor_storages; // List of tensor storages, required for the dispatch stage
+ std::vector<std::pair<int32_t, TensorComponent>> list_tensor_components;// List of tensor components (width, stride,..), required for the dispatch stage)
+};
+
+// This function should produce an object with the source
+inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string& name)
+{
+ std::string code;
+ code += "__kernel void ";
+ code += name;
+ code += "(\n";
+
+ auto IdSpaces = in.arguments.IdSpace_declarations();
+
+ std::vector<std::string> arg_str;
+
+ auto tensor_args = in.arguments.tensor_argument_declarations();
+
+ for(auto &i : tensor_args)
+ {
+ // For each tensor used, get the storage and tensor components
+ auto storages = i->storage_declarations();
+ auto components = i->component_declarations();
+
+ for(auto &y : storages)
+ {
+ std::string str;
+ str += i->storage_type_declaration(y);
+ str += " ";
+ str += i->storage(y);
+ arg_str.push_back(str);
+ }
+
+ for(auto &y : components)
+ {
+ std::string str;
+ str += i->component_type_declaration();
+ str += " ";
+ str += i->component(y);
+ arg_str.push_back(str);
+ }
+ }
+
+ for(size_t i = 0; i < arg_str.size(); ++i)
+ {
+ code += arg_str[i];
+ if(i + 1 < arg_str.size())
+ {
+ code += ",\n";
+ }
+ }
+
+ code += ")\n";
+ code += "{\n";
+ code += in.code;
+ code += "}\n";
+
+ return code;
+};
+
+/**
+ * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know
+ * how to reduce the dimensionality of a tensor
+ *
+ */
+class GpuTensor3dMapper
+{
+public:
+ GpuTensor3dMapper(IGpuTensorArgument* tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor)
+ {
+ };
+
+ std::string tensor_component_x() const
+ {
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ case TensorSamplerFormat::C_W_H:
+ return _tensor->component(TensorComponent::C);
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ std::string tensor_component_y() const
+ {
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ return _tensor->component(TensorComponent::WxH);
+ case TensorSamplerFormat::C_W_H:
+ return _tensor->component(TensorComponent::W);
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ std::string tensor_component_z() const
+ {
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ return "1";
+ case TensorSamplerFormat::C_W_H:
+ return _tensor->component(TensorComponent::H);
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ std::string tensor_component_stride_y() const
+ {
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ case TensorSamplerFormat::C_W_H:
+ return _tensor->component(TensorComponent::Stride1);
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ std::string tensor_component_stride_z() const
+ {
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ return "0";
+ case TensorSamplerFormat::C_W_H:
+ return _tensor->component(TensorComponent::Stride2);
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ std::string tensor_component_stride_batch() const
+ {
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ case TensorSamplerFormat::C_W_H:
+ return _tensor->component(TensorComponent::Stride3);
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ bool is_one_component_x() const
+ {
+ auto t = _tensor->format();
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ case TensorSamplerFormat::C_W_H:
+ return t.shape[0] == 1;
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ bool is_one_component_y() const
+ {
+ auto t = _tensor->format();
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ return (t.shape[1] * t.shape[2]) == 1;
+ case TensorSamplerFormat::C_W_H:
+ return t.shape[1] == 1;
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ bool is_one_component_z() const
+ {
+ auto t = _tensor->format();
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ return true;
+ case TensorSamplerFormat::C_W_H:
+ return t.shape[2] == 1;
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ bool is_one_component_batch() const
+ {
+ auto t = _tensor->format();
+ const auto format = _sampler.format;
+ switch(format)
+ {
+ case TensorSamplerFormat::C_WH_1:
+ case TensorSamplerFormat::C_W_H:
+ return t.shape[3] == 1;
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ }
+ }
+
+ GpuSampler gpu_sampler() const
+ {
+ return _sampler;
+ }
+
+ IGpuTensorArgument* tensor_argument() const
+ {
+ return _tensor;
+ }
+
+private:
+ GpuSampler _sampler;
+ IGpuTensorArgument* _tensor;
+};
+
+struct GpuKernelWriterAttribute
+{
+ bool return_tensor_component_by_value { false };
+};
+
+enum class ConvertPolicy
+{
+ Wrap, /**< Wrap around */
+ Saturate /**< Saturate */
+};
+
+enum class RoundingMode
+{
+ None,
+ Rte,
+ Rtz,
+ Rtp,
+ Rtn
+};
+
+// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html
+class IGpuKernelWriter
+{
+public:
+ virtual ~IGpuKernelWriter() = default;
+ virtual void set_IdSpace(int32_t id) = 0;
+ virtual void import_tile(const std::string& dst, const IVectorTile *src) = 0;
+ virtual void declare_argument(const std::string& name, const TensorInfo& tensor) = 0;
+ virtual void declare_tile(const std::string& name, const TileInfo& info) = 0;
+ virtual void declare_const_tile(const std::string& name, const std::vector<std::vector<std::string>>& in, DataType dt) = 0;
+ virtual void write_text(const std::string& x) = 0;
+ virtual void compound_statement_begin() = 0;
+ virtual void compound_statement_end() = 0;
+
+ // Operations
+ virtual void op_get_global_id(const Operand& dst_var, int32_t dim) = 0;
+ virtual void op_get_global_coord(const Operand& dst, const Operand& step, const TensorOperand& tensor, int32_t dim) = 0;
+ virtual void op_get_global_batch(const Operand& dst, const TensorOperand& tensor) = 0;
+ virtual void op_get_global_size(const Operand& dst_var, int32_t dim) = 0;
+ virtual void op_binary_expression(const Operand& dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
+ virtual void op_assign(const Operand& dst_name, const Operand& src_name) = 0;
+ virtual void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) = 0;
+ virtual void op_if(const Operand& lhs, BinaryOp op, const Operand& rhs) = 0;
+ virtual void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value, AssignmentOp update_op, const Operand& update_value) = 0;
+ virtual void op_load_indirect(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y_indirect, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0;
+ virtual void op_load_immediate(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32), const Operand& dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
+ virtual void op_store_immediate(const TensorOperand& tensor, const Operand& src, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0;
+ virtual void op_cast_expression(const Operand& dst, const Operand &src, ConvertPolicy policy) = 0;
+ virtual void op_return() = 0;
+ // virtual void op_else() = 0;
+ // virtual void op_elseif() = 0;
+ // Utils
+ // It is the process of converting
+ virtual void util_get_indirect_buffer(const Operand& dst, const TensorOperand& tensor, const Operand& x, const Operand& y, const Operand& x_off, const Operand& y_off) = 0;
+};
+
+enum class GpuLoadStoreType
+{
+ Load = 1,
+ Store = 2
+};
+
+class IGpuLoadStoreHelperWriter
+{
+public:
+ IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type) : _writer(x), _mapper(mapper), _type(type) {}
+ IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default;
+ IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default;
+ virtual ~IGpuLoadStoreHelperWriter() = default;
+ virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0;
+ virtual void write(const std::pair<int32_t, std::string>& y) = 0;
+ virtual void finalize() = 0;
+protected:
+ IGpuKernelWriter* _writer;
+ GpuTensor3dMapper _mapper;
+ GpuLoadStoreType _type;
+};
+
+class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter
+{
+public:
+ ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type)
+ {
+ }
+
+ ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default;
+ ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
+
+ static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
+ {
+ CKW_UNUSED(x, type, dst);
+
+ if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
+ {
+ return false;
+ }
+ return true;
+ }
+
+ void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
+ {
+ assert(validate(_writer, _mapper, _type, dst));
+
+ _dst = dst;
+ _ls_width_full = dst->format().w;
+
+ _coord_x = x->scalar(0, 0).str;
+ _coord_z = z->scalar(0, 0).str;
+ _coord_b = b->scalar(0, 0).str;
+ _coord_orig_z = _coord_z;
+
+ out_of_bound_initialize_x(_coord_x);
+ out_of_bound_initialize_z(_coord_z);
+
+ /*
+ meaning of else:
+ - x: partial load/store
+ - y: no load/store operation
+ - z: no load/store operation
+ if(x)
+ {
+ if(z)
+ {
+ if(y)
+ {
+ // full load/store width
+ }
+ else
+ {
+ // no load/store
+ }
+ }
+ else
+ {
+ // no load/store
+ }
+ }
+ else
+ {
+ if(z)
+ {
+ if(y)
+ {
+ // partial load/store width
+ }
+ else
+ {
+ // no load/store
+ }
+ }
+ else
+ {
+ // no load/store
+ }
+ }
+ */
+ }
+
+ void write(const std::pair<int32_t, std::string>& y) override
+ {
+ int32_t idx_y = y.first;
+ std::string coord_y = y.second;
+
+ // The only check required is on Y.
+ out_of_bound_initialize_y(coord_y);
+
+ const std::string dst = _dst->vector(idx_y).str;
+ const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
+ const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address);
+
+ _writer->write_text(ls_buf);
+ _writer->write_text(";\n");
+
+ out_of_bound_finalize_y(dst);
+
+ // The left over load/store will be written in the finalize stage
+ if(_ls_width_part.size() != 0)
+ {
+ int32_t w = 0;
+ for(auto &p : _ls_width_part)
+ {
+ const std::string dst0 = _dst->vector(w, p, idx_y).str;
+ const std::string coord_x = _coord_x + " + " + std::to_string(w);
+ const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
+ const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address);
+ _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0));
+
+ w += p;
+ }
+ }
+ }
+
+ void finalize() override
+ {
+ out_of_bound_finalize_z();
+ out_of_bound_finalize_x();
+ }
+private:
+ IVectorTile* _dst { nullptr };
+ int32_t _ls_width_full { 0 };
+ std::vector<int32_t> _ls_width_part { };
+ std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x {};
+ std::string _coord_x {};
+ std::string _coord_z {};
+ std::string _coord_orig_z {};
+ std::string _coord_b {};
+
+ void out_of_bound_initialize_x(std::string& coord)
+ {
+ if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
+ {
+ auto tensor_format = _mapper.tensor_argument()->format();
+ auto shape = tensor_format.shape;
+
+ _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
+ if(_ls_width_part.size() != 0)
+ {
+ _writer->write_text("if(" + coord + " > 0)\n");
+ _writer->compound_statement_begin();
+ }
+ }
+ };
+
+ void out_of_bound_finalize_x()
+ {
+ if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
+ {
+ if(_ls_width_part.size() != 0)
+ {
+ _writer->compound_statement_end();
+ _writer->write_text("else\n");
+ _writer->compound_statement_begin();
+
+ out_of_bound_initialize_z(_coord_orig_z);
+ for(auto &i : _leftovers_x)
+ {
+ out_of_bound_initialize_y(i.first.second);
+ _writer->write_text(i.second);
+ _writer->write_text(";\n");
+ out_of_bound_finalize_y(i.first.first);
+ }
+ out_of_bound_finalize_z();
+ _writer->compound_statement_end();
+ }
+ }
+ };
+
+ void out_of_bound_initialize_y(std::string& coord)
+ {
+ std::string max = "";
+
+ const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
+
+ switch(address_mode_y)
+ {
+ case TensorSamplerAddressModeY::Skip:
+ case TensorSamplerAddressModeY::ClampToBorder:
+ // NOTE: This line should not be moved outside of the switch statement.
+ // The reason for that is because when we query the component, the component is marked as used
+ // and added to the list of arguments of the kernel. Since, not in all cases this component is required,
+ // we should request the component only when used
+ max = _mapper.tensor_component_y();
+ _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
+ _writer->compound_statement_begin();
+ break;
+ case TensorSamplerAddressModeY::SkipMinEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
+ _writer->write_text("if(" + coord + " >= 0)\n");
+ _writer->compound_statement_begin();
+ break;
+ case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
+ max = _mapper.tensor_component_y();
+ _writer->write_text("if(" + coord + " < " + max + ")\n");
+ _writer->compound_statement_begin();
+ break;
+ case TensorSamplerAddressModeY::ClampToNearest:
+ max = _mapper.tensor_component_y();
+ coord = "clamp(" + coord + ", 0, " + max + " - 1)";
+ break;
+ case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
+ max = _mapper.tensor_component_y();
+ coord = "min(" + coord + ", " + max + " - 1)";
+ break;
+ case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
+ coord = "max(" + coord + ", 0)";
+ break;
+ case TensorSamplerAddressModeY::None:
+ break;
+ default:
+ std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
+ assert(false);
+ }
+ };
+
+ void out_of_bound_finalize_y(const std::string& dst)
+ {
+ const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
+
+ switch(address_mode_y)
+ {
+ case TensorSamplerAddressModeY::ClampToBorder:
+ case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
+ case TensorSamplerAddressModeY::Skip:
+ case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
+ case TensorSamplerAddressModeY::SkipMinEdgeOnly:
+ _writer->compound_statement_end();
+ break;
+
+ default:
+ assert(false);
+ }
+
+ switch(address_mode_y)
+ {
+ case TensorSamplerAddressModeY::ClampToBorder:
+ case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
+ _writer->write_text("else\n");
+ _writer->compound_statement_begin();
+ _writer->write_text(dst);
+ _writer->write_text(" = 0.0f;\n");
+ _writer->compound_statement_end();
+ break;
+
+ default:
+ assert(false);
+ }
+ };
+
+ void out_of_bound_initialize_z(std::string& coord)
+ {
+ std::string max = "";
+
+ const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
+
+ switch(address_mode_z)
+ {
+ case TensorSamplerAddressModeZ::Skip:
+ max = _mapper.tensor_component_z();
+ _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
+ _writer->compound_statement_begin();
+ break;
+ case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
+ _writer->write_text("if(" + coord + " >= 0)\n");
+ _writer->compound_statement_begin();
+ break;
+ case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
+ max = _mapper.tensor_component_z();
+ _writer->write_text("if(" + coord + " < " + max + ")\n");
+ _writer->compound_statement_begin();
+ break;
+ case TensorSamplerAddressModeZ::ClampToNearest:
+ max = _mapper.tensor_component_z();
+ coord = "clamp(" + coord + ", 0, " + max + " - 1)";
+ break;
+ case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly:
+ max = _mapper.tensor_component_z();
+ coord = "min(" + coord + ", " + max + " - 1)";
+ break;
+ case TensorSamplerAddressModeZ::ClampToMinEdgeOnly:
+ coord = "max(" + coord + ", 0)";
+ break;
+ case TensorSamplerAddressModeZ::None:
+ break;
+ default:
+ std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
+ assert(false);
+ }
+ };
+
+ void out_of_bound_finalize_z()
+ {
+ const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
+
+ switch(address_mode_z)
+ {
+ case TensorSamplerAddressModeZ::Skip:
+ case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
+ case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
+ _writer->compound_statement_end();
+ break;
+
+ default:
+ assert(false);
+ }
+ };
+
+ std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const
+ {
+ std::vector<int32_t> x;
+
+ switch(ls_leftover_vector_width)
+ {
+ case 0:
+ break;
+ case 1:
+ case 2:
+ case 3:
+ case 4:
+ case 8:
+ case 16:
+ x.push_back(ls_leftover_vector_width);
+ break;
+ case 5:
+ x.push_back(4);
+ x.push_back(1);
+ break;
+ case 6:
+ x.push_back(4);
+ x.push_back(2);
+ break;
+ case 7:
+ x.push_back(4);
+ x.push_back(3);
+ break;
+ case 9:
+ x.push_back(8);
+ x.push_back(1);
+ break;
+ case 10:
+ x.push_back(8);
+ x.push_back(2);
+ break;
+ case 11:
+ x.push_back(8);
+ x.push_back(3);
+ break;
+ case 12:
+ x.push_back(8);
+ x.push_back(4);
+ break;
+ case 13:
+ x.push_back(8);
+ x.push_back(4);
+ x.push_back(1);
+ break;
+ case 14:
+ x.push_back(8);
+ x.push_back(4);
+ x.push_back(2);
+ break;
+ case 15:
+ x.push_back(8);
+ x.push_back(4);
+ x.push_back(3);
+ break;
+
+ default:
+ assert(false);
+ }
+ return x;
+ }
+
+ std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& address)
+ {
+ switch(type)
+ {
+ case GpuLoadStoreType::Load:
+ if(vector_width != 1)
+ {
+ return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
+ }
+ else
+ {
+ return data + " = *(" + address + ")";
+ }
+ break;
+ case GpuLoadStoreType::Store:
+ if(vector_width != 1)
+ {
+ return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
+ }
+ else
+ {
+ return "*(" + address + ") = " + data;
+ }
+ break;
+ default:
+ std::cout << "Unsupported GpuLoadStoreType" << std::endl;
+ assert(false);
+ }
+ }
+
+ std::string to_ls_buffer_address(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const
+ {
+ auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
+ assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
+ const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage);
+ const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
+
+ std::string address;
+ address += "(__global ";
+ address += dst_type;
+ address += "*)(";
+ address += ptr_buf;
+ if(x != "0" && (_mapper.is_one_component_x() != true))
+ {
+ address += " + (";
+ address += x + ") * sizeof(" + dst_type + ")";
+ }
+ if(y != "0" && (_mapper.is_one_component_y() != true))
+ {
+ const std::string stride_y = _mapper.tensor_component_stride_y();
+ address += " + (";
+ address += y + ")";
+ address += " * ";
+ address += stride_y;
+ }
+ if(z != "0" && (_mapper.is_one_component_z() != true))
+ {
+ const std::string stride_z = _mapper.tensor_component_stride_z();
+ address += " + (";
+ address += z + ")";
+ address += " * ";
+ address += stride_z;
+ }
+ if(b != "0" && (_mapper.is_one_component_batch() != true))
+ {
+ const std::string stride_b = _mapper.tensor_component_stride_batch();
+ address += " + (";
+ address += b + ")";
+ address += " * ";
+ address += stride_b;
+ }
+ address += ")";
+ return address;
+ }
+};
+
+class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
+{
+public:
+ static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type, IVectorTile *dst)
+ {
+ CKW_UNUSED(x);
+
+ if(dst->format().w != 4)
+ {
+ return false;
+ }
+ if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
+ {
+ return false;
+ }
+ if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
+ {
+ return false;
+ }
+ if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
+ {
+ return false;
+ }
+ if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store)
+ {
+ return false;
+ }
+ if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
+ {
+ return false;
+ }
+ return true;
+ /*
+ - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4
+ - z: Only GpuSamplerAddressModeZ::None is supported
+ */
+ }
+ ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type)
+ {
+ }
+
+ ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default;
+ ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default;
+
+ void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
+ {
+ assert(validate(_writer, _mapper, _type, dst));
+
+ _dst = dst;
+ _ls_width_full = dst->format().w;
+ _coord_x = x->scalar(0, 0).str;
+ _coord_z = z->scalar(0, 0).str;
+ _coord_b = b->scalar(0, 0).str;
+
+ /*
+ if(y)
+ {
+ // full load/store width
+ }
+ else
+ {
+ // no load/store
+ }
+ */
+ }
+
+ void write(const std::pair<int32_t, std::string>& y) override
+ {
+ int32_t idx_y = y.first;
+ std::string coord_y = y.second;
+
+ // The only check required is on Y.
+ out_of_bound_initialize_y(coord_y);
+
+ const std::string dst = _dst->vector(idx_y).str;
+ const std::string sampler = to_ls_image2d_sampler();
+ const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b);
+ const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord);
+
+ _writer->write_text(ls_buf);
+ _writer->write_text(";\n");
+
+ out_of_bound_finalize_y(dst);
+ }
+
+ void finalize() override
+ {
+ }
+private:
+ IVectorTile* _dst { nullptr };
+ int32_t _ls_width_full { 0 };
+ std::string _coord_x {};
+ std::string _coord_z {};
+ std::string _coord_b {};
+
+ void out_of_bound_initialize_y(std::string& coord)
+ {
+ std::string max = "";
+
+ const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
+
+ switch(address_mode_y)
+ {
+ case TensorSamplerAddressModeY::Skip:
+ max = _mapper.tensor_component_y();
+ _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
+ _writer->compound_statement_begin();
+ break;
+ case TensorSamplerAddressModeY::SkipMinEdgeOnly:
+ _writer->write_text("if(" + coord + " >= 0)\n");
+ _writer->compound_statement_begin();
+ break;
+ case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
+ max = _mapper.tensor_component_y();
+ _writer->write_text("if(" + coord + " < " + max + ")\n");
+ _writer->compound_statement_begin();
+ break;
+ case TensorSamplerAddressModeY::ClampToBorder:
+ case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToNearest:
+ case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
+ case TensorSamplerAddressModeY::None:
+ break;
+ default:
+ std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl;
+ assert(false);
+ }
+ };
+
+ void out_of_bound_finalize_y(const std::string& dst)
+ {
+ CKW_UNUSED(dst);
+
+ const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
+
+ switch(address_mode_y)
+ {
+ case TensorSamplerAddressModeY::Skip:
+ case TensorSamplerAddressModeY::SkipMinEdgeOnly:
+ case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
+ _writer->compound_statement_end();
+ break;
+
+ default:
+ assert(false);
+ }
+ };
+
+ std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& sampler, const std::string& coord)
+ {
+ CKW_UNUSED(vector_width);
+
+ auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
+ const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage);
+ // const DataType dt = _dst->format().dt;
+ const std::string post_fix = _dst->format().dt == DataType::Fp32? "f" : "h";
+
+ switch(type)
+ {
+ case GpuLoadStoreType::Load:
+ return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
+ break;
+ case GpuLoadStoreType::Store:
+ return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")";
+ default:
+ assert(false);
+ std::cout << "Unsupported GpuLoadStoreType" << std::endl;
+ assert(false);
+ }
+ }
+
+ std::string to_ls_image2d_sampler() const
+ {
+ const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
+
+ switch(address_mode_y)
+ {
+ case TensorSamplerAddressModeY::None:
+ return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
+ case TensorSamplerAddressModeY::Skip:
+ case TensorSamplerAddressModeY::SkipMinEdgeOnly:
+ case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToBorder:
+ case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
+ return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
+ case TensorSamplerAddressModeY::ClampToNearest:
+ case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
+ case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
+ return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST";
+ default:
+ std::cout << "Unsupported address_mode_coord" << std::endl;
+ assert(false);
+ }
+ }
+
+ std::string to_ls_image2d_coord(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const
+ {
+ std::string coord_x = "(" + x + ") >> 2";
+ std::string coord_y = "(";
+
+ if(y != "0" && (_mapper.is_one_component_y() != true))
+ {
+ coord_y += y;
+ }
+ if(z != "0" && (_mapper.is_one_component_z() != true))
+ {
+ const std::string dim = _mapper.tensor_component_y();
+ coord_y += " + (";
+ coord_y += z + ")";
+ coord_y += " * ";
+ coord_y += dim;
+ }
+ if(b != "0" && (_mapper.is_one_component_batch() != true))
+ {
+ const std::string dim0 = _mapper.tensor_component_y();
+ const std::string dim1 = _mapper.tensor_component_z();
+ coord_y += " + (";
+ coord_y += b + ")";
+ coord_y += " * ";
+ coord_y += dim0;
+ coord_y += " * ";
+ coord_y += dim1;
+ }
+ coord_y += ")";
+ return "(int2)(" + coord_x + ", " + coord_y + ")";
+ }
+};
+
+/** IGpuLoadStoreHelperWriter factory class */
+class ClLoadStoreHelperWriterFactory final
+{
+public:
+ /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper
+ *
+ *
+ * @return IGpuLoadStoreHelperWriter
+ */
+ static std::unique_ptr<IGpuLoadStoreHelperWriter> create(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type)
+ {
+ const auto tensor_storage = mapper.gpu_sampler().storage;
+ switch(tensor_storage)
+ {
+ case GpuSamplerTensorStorage::BufferUint8Ptr:
+ return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
+ case GpuSamplerTensorStorage::Image2dReadOnly:
+ case GpuSamplerTensorStorage::Image2dWriteOnly:
+ return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type);
+ default:
+ std::cout << "Unsupported Gpu tensor storage" << std::endl;
+ assert(false);
+ }
+ }
+};
+
+// This utility method needs to go in utils.h
+inline bool is_tile_scalar(IVectorTile* x)
+{
+ return x->format().w == 1 && x->format().h == 1;
+}
+
+class ClKernelWriter : public IGpuKernelWriter
+{
+public:
+ ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
+ {
+ _data = x;
+ _attr = attr;
+ }
+
+ ClKernelWriter(const ClKernelWriter &) = default;
+ ClKernelWriter &operator=(const ClKernelWriter &) = default;
+
+ // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure
+ // there are no conflicts or ambiguity in the code
+ void set_IdSpace(int32_t id) override
+ {
+ _data->tiles.set_IdSpace(id);
+ _data->arguments.set_IdSpace(id);
+ }
+
+ void import_tile(const std::string& dst_name, const IVectorTile *src) override
+ {
+ _data->tiles.insert(dst_name, src);
+ }
+
+ void declare_argument(const std::string& name, const TensorInfo& tensor) override
+ {
+ assert(_data->arguments[name] == nullptr);
+ _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value);
+ }
+
+ void declare_tile(const std::string& name, const TileInfo& format) override
+ {
+ assert(_data->tiles[name] == nullptr);
+ _data->tiles.insert(name, format);
+
+ IVectorTile *x = _data->tiles[name];
+
+ for(auto &t : x->underlying_source_variables())
+ {
+ _data->code += t.type.str + " " + t.str + ";\n";
+ }
+ }
+
+ void declare_const_tile(const std::string& name, const std::vector<std::vector<std::string>>& in, DataType dt) override
+ {
+ assert(_data->tiles[name] == nullptr);
+ _data->tiles.insert(name, in, dt);
+ // Note: A constant does not need to be declared in the code
+ }
+
+ void write_text(const std::string& x) override
+ {
+ _data->code += x;
+ }
+
+ void compound_statement_begin() override
+ {
+ _data->tiles.increment_registry_level();
+ _data->code += "{\n";
+ }
+
+ void compound_statement_end() override
+ {
+ _data->tiles.decrement_registry_level();
+ _data->code += "}\n";
+ }
+
+ void op_get_global_id(const Operand& dst_var, int32_t dim) override
+ {
+ assert(dst_var.type() == OperandType::Tile);
+ assert(_data->tiles.has_tile(dst_var.value()));
+ assert(_data->tiles[dst_var.value()]->format().w == 1 &&
+ _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
+
+ auto var = _data->tiles[dst_var.value()];
+
+ _data->code += var->scalar(0, 0).str;
+ _data->code += " = get_global_id(";
+ _data->code += std::to_string(dim);
+ _data->code += ");\n";
+ };
+
+ void op_get_global_coord(const Operand& o_dst, const Operand& o_step, const TensorOperand& o_tensor, int32_t dim) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto dst = operands.unpack(o_dst);
+ auto step = operands.unpack(o_step);
+
+ // Validation: Check that x, y and z are scalar
+
+ TensorOperandUnpacker tensor_operands(_data->arguments);
+ auto tensor = tensor_operands.unpack(o_tensor);
+ auto gpu_sampler = o_tensor.sampler();
+
+ GpuTensor3dMapper mapper(tensor, gpu_sampler);
+
+ switch (dim)
+ {
+ case 0:
+ if(mapper.is_one_component_x())
+ {
+ _data->code += dst->scalar(0, 0).str;
+ _data->code += " = 0;\n";
+ }
+ else
+ {
+ if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
+ {
+ // Validation: Check: fixed tensor shape
+ // TO BE CHANGED
+ _data->code += dst->scalar(0, 0).str;
+ _data->code += " = get_global_id(0) * ";
+ _data->code += step->scalar(0, 0).str;
+ _data->code += ";\n";
+ }
+ else
+ {
+ _data->code += dst->scalar(0, 0).str;
+ _data->code += " = get_global_id(0) * ";
+ _data->code += step->scalar(0, 0).str;
+ _data->code += ";\n";
+ }
+ }
+ break;
+ case 1:
+ if(mapper.is_one_component_y())
+ {
+ _data->code += dst->scalar(0, 0).str;
+ _data->code += " = 0;\n";
+ }
+ else
+ {
+ if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
+ {
+
+ }
+ else
+ {
+ _data->code += dst->scalar(0, 0).str;
+ _data->code += " = get_global_id(1) * ";
+ _data->code += step->scalar(0, 0).str;
+ _data->code += ";\n";
+ }
+ }
+ break;
+ case 2:
+ if(mapper.is_one_component_z())
+ {
+ _data->code += dst->scalar(0, 0).str;
+ _data->code += " = 0;\n";
+ }
+ else
+ {
+ _data->code += dst->scalar(0, 0).str;
+ _data->code += " = get_global_id(2) * ";
+ _data->code += step->scalar(0, 0).str;
+ _data->code += ";\n";
+ }
+ break;
+ default:
+ break;
+ }
+ };
+
+ void op_get_global_batch(const Operand& o_dst, const TensorOperand& o_tensor) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto dst = operands.unpack(o_dst);
+
+ TensorOperandUnpacker tensor_operands(_data->arguments);
+ auto tensor = tensor_operands.unpack(o_tensor);
+ auto gpu_sampler = o_tensor.sampler();
+
+ GpuTensor3dMapper mapper(tensor, gpu_sampler);
+
+ if(mapper.is_one_component_batch())
+ {
+ _data->code += dst->scalar(0, 0).str;
+ _data->code += " = 0;\n";
+ }
+ else
+ {
+ std::cout << "Unsupported batched computation" << std::endl;
+ assert(false);
+ }
+ };
+
+ void op_get_global_size(const Operand& dst_var, int32_t dim) override
+ {
+ assert(dst_var.type() == OperandType::Tile);
+ assert(_data->tiles.has_tile(dst_var.value()));
+ assert(_data->tiles[dst_var.value()]->format().w == 1 &&
+ _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
+
+ auto var = _data->tiles[dst_var.value()];
+
+ _data->code += var->scalar(0, 0).str;
+ _data->code += " = get_global_size(";
+ _data->code += std::to_string(dim);
+ _data->code += ");\n";
+ }
+
+ void op_binary_expression(const Operand& dst_name, const Operand& lhs_name, BinaryOp op, const Operand& rhs_name) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto lhs = operands.unpack(lhs_name);
+ auto rhs = operands.unpack(rhs_name);
+ auto dst = operands.unpack(dst_name);
+
+ const int32_t dst_w = dst->format().w;
+ const int32_t dst_h = dst->format().h;
+ assert(lhs != nullptr);
+ const int32_t lhs_w = lhs->format().w;
+ const int32_t rhs_w = rhs->format().w;
+
+ if(op == BinaryOp::MatMul_Nt_T)
+ {
+ assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
+ for(int32_t y = 0; y < dst_h; ++y)
+ {
+ for(int32_t x = 0; x < dst_w; ++x)
+ {
+ for(int32_t k = 0; k < lhs_w; ++k)
+ {
+ _data->code += dst->scalar(x, y).str;
+ _data->code += " = fma(";
+ _data->code += lhs->scalar(k, y).str;
+ _data->code += ", ";
+ _data->code += rhs->scalar(k, x).str;
+ _data->code += ", ";
+ _data->code += dst->scalar(x, y).str;
+ _data->code += ");\n";
+ }
+ }
+ }
+
+ return;
+ }
+
+ bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
+ bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
+
+ std::string lhs_prefix = broadcast_lhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
+ std::string rhs_prefix = broadcast_rhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
+ std::string op_str = to_string(op);
+
+ // Broadcasting on Y is automatic
+ for(int32_t y = 0; y < dst_h; ++y)
+ {
+ _data->code += dst->vector(y).str;
+ _data->code += " = ";
+ _data->code += lhs_prefix + lhs->vector(y).str;
+ _data->code += " ";
+ _data->code += op_str;
+ _data->code += " ";
+ _data->code += rhs_prefix + rhs->vector(y).str;
+ _data->code += ";\n";
+ }
+ };
+
+ void op_cast_expression(const Operand& o_dst, const Operand &o_src, ConvertPolicy policy) override
+ {
+ CKW_UNUSED(policy);
+
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto src = operands.unpack(o_src);
+ auto dst = operands.unpack(o_dst);
+
+ // const int32_t dst_w = dst->format().w;
+ const int32_t dst_h = dst->format().h;
+ const std::string dt = dst->scalar(0, 0).type.str;
+
+ // Broadcasting on Y is automatic
+ for(int32_t y = 0; y < dst_h; ++y)
+ {
+ _data->code += dst->vector(y).str;
+ _data->code += " = convert_" + dt + "(";
+ _data->code += src->vector(y).str;
+ _data->code += ");\n";
+ }
+ };
+
+ void op_assign(const Operand& dst_name, const Operand& src_name) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto src = operands.unpack(src_name);
+ auto dst = operands.unpack(dst_name);
+
+ const int32_t dst_w = dst->format().w;
+ const int32_t dst_h = dst->format().h;
+ const int32_t src_w = src->format().w;
+ // const int32_t src_h = src->format().h;
+ const std::string dt = dst->scalar(0, 0).type.str;
+
+ bool broadcast_src_x = dst_w != 1 && src_w == 1;
+
+ std::string src_prefix = broadcast_src_x? "(" + dt + ")" : "";
+
+ // Broadcasting on Y is automatic
+ for(int32_t y = 0; y < dst_h; ++y)
+ {
+ _data->code += dst->vector(y).str;
+ _data->code += " = ";
+ _data->code += src_prefix + src->vector(y).str;
+ _data->code += ";\n";
+ }
+ }
+
+ void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto src = operands.unpack(src_name);
+ auto dst = operands.unpack(dst_name);
+
+ const int32_t dst_w = dst->format().w;
+ const int32_t dst_h = dst->format().h;
+ const int32_t src_w = src->format().w;
+ // const int32_t src_h = src->format().h;
+ const std::string dt = dst->scalar(0, 0).type.str;
+
+ bool broadcast_src_x = dst_w != 1 && src_w == 1;
+
+ std::string src_prefix = broadcast_src_x? "(" + dt + ")" : "";
+
+ // Broadcasting on Y is automatic
+ for(int32_t y = 0; y < dst_h; ++y)
+ {
+ _data->code += dst->vector(y).str;
+ _data->code += " = ";
+
+ switch(func)
+ {
+ case ScalarUnaryFunction::Exp:
+ _data->code += "exp(";
+ break;
+
+ default:
+ CKW_ASSERT(false);
+ }
+
+ _data->code += src_prefix + src->vector(y).str;
+ _data->code += ");\n";
+ }
+ }
+
+ void op_if(const Operand& o_lhs, BinaryOp op, const Operand& o_rhs) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto lhs = operands.unpack(o_lhs);
+ auto rhs = operands.unpack(o_rhs);
+
+ assert(is_tile_scalar(lhs));
+ assert(is_tile_scalar(rhs));
+
+ _data->code += "if(";
+ _data->code += lhs->scalar(0, 0).str;
+ _data->code += " ";
+ _data->code += to_string(op);
+ _data->code += " ";
+ _data->code += rhs->scalar(0, 0).str;
+ _data->code += ")\n";
+ }
+
+ void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, AssignmentOp update_op, const Operand& update_value_name) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto var = operands.unpack(var_name);
+ auto cond_value = operands.unpack(cond_value_name);
+ auto update_value = operands.unpack(update_value_name);
+
+ const int32_t dst_w = var->format().w;
+ const int32_t dst_h = var->format().h;
+
+ // It must be a scalar variable
+ assert(dst_w == 1);
+ assert(dst_h == 1);
+
+ _data->code += "for(; " ;
+ _data->code += var->scalar(0, 0).str;
+ _data->code += " ";
+ _data->code += to_string(cond_op);
+ _data->code += " " + cond_value->scalar(0, 0).str + "; ";
+ _data->code += var->scalar(0, 0).str;
+ _data->code += " ";
+ _data->code += to_string(update_op);
+ _data->code += " " + update_value->scalar(0, 0).str + ")";
+ _data->code += "\n";
+ }
+
+ void op_load_immediate(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_y, const Operand& o_z, const Operand& o_batch_idx, const Operand& dilation_y) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto dst = operands.unpack(o_dst);
+ auto x = operands.unpack(o_x);
+ auto y = operands.unpack(o_y);
+ auto z = operands.unpack(o_z);
+ auto dil_y = operands.unpack(dilation_y);
+ auto b = operands.unpack(o_batch_idx);
+
+ TensorOperandUnpacker tensor_operands(_data->arguments);
+ auto tensor = tensor_operands.unpack(o_tensor);
+ auto gpu_sampler = o_tensor.sampler();
+
+ GpuTensor3dMapper mapper(tensor, gpu_sampler);
+
+ auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
+
+ // Initialize the constant part
+ load_writer->initialize(dst, x, z, b);
+
+ for(int i = 0; i < dst->format().h; ++i)
+ {
+ std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
+ if(dil_y->scalar(0, 0).str != "1")
+ {
+ coord_y += " * " + dil_y->scalar(0, 0).str;
+ }
+ load_writer->write(std::make_pair(i, coord_y));
+ }
+
+ load_writer->finalize();
+ }
+
+ void op_load_indirect(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_indirect_h, const Operand& o_z, const Operand& o_batch_idx) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto dst = operands.unpack(o_dst);
+ auto x = operands.unpack(o_x);
+ auto y_ind = operands.unpack(o_indirect_h);
+ auto z = operands.unpack(o_z);
+ auto b = operands.unpack(o_batch_idx);
+
+ TensorOperandUnpacker tensor_operands(_data->arguments);
+ auto tensor = tensor_operands.unpack(o_tensor);
+ auto gpu_sampler = o_tensor.sampler();
+
+ GpuTensor3dMapper mapper(tensor, gpu_sampler);
+
+ auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
+
+ // Initialize the constant part
+ load_writer->initialize(dst, x, z, b);
+
+ for(int i = 0; i < dst->format().h; ++i)
+ {
+ load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
+ }
+
+ load_writer->finalize();
+ }
+
+ void op_store_immediate(const TensorOperand& tensor_name, const Operand& src_name, const Operand& x_name, const Operand& y_name, const Operand& z_name, const Operand& batch_index_name) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto src = operands.unpack(src_name);
+ auto x = operands.unpack(x_name);
+ auto y = operands.unpack(y_name);
+ auto z = operands.unpack(z_name);
+ auto b = operands.unpack(batch_index_name);
+
+ TensorOperandUnpacker tensor_operands(_data->arguments);
+ auto tensor = tensor_operands.unpack(tensor_name);
+ auto gpu_sampler = tensor_name.sampler();
+
+ GpuTensor3dMapper mapper(tensor, gpu_sampler);
+
+ auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store);
+
+ // Initialize the constant part
+ store_writer->initialize(src, x, z, b);
+
+ int32_t tile_h = src->format().h;
+
+ for(int m0 = tile_h - 1; m0 >= 0; m0--)
+ {
+ store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
+ }
+
+ store_writer->finalize();
+ }
+
+ void op_return() override
+ {
+ _data->code += "return;\n";
+ }
+
+ void util_get_indirect_buffer(const Operand& o_dst, const TensorOperand& o_tensor, const Operand& o_x, const Operand& o_y, const Operand& o_x_off, const Operand& o_y_off) override
+ {
+ OperandUnpacker operands(_data->tiles, _data->arguments);
+ auto dst = operands.unpack(o_dst);
+ auto x = operands.unpack(o_x);
+ auto y = operands.unpack(o_y);
+ auto x_off = operands.unpack(o_x_off);
+ auto y_off = operands.unpack(o_y_off);
+
+ TensorOperandUnpacker tensor_operands(_data->arguments);
+ auto tensor = tensor_operands.unpack(o_tensor);
+
+ assert(dst->format().w == 1);
+ assert(x->format().w == 1);
+ assert(y->format().w == 1);
+ assert(x_off->format().w == 1);
+ assert(y_off->format().w == 1);
+ assert(dst->format().dt == DataType::Int32);
+ assert(x->format().dt == DataType::Int32);
+ assert(y->format().dt == DataType::Int32);
+ assert(x_off->format().dt == DataType::Int32);
+ assert(y_off->format().dt == DataType::Int32);
+
+ const std::string width = tensor->component(TensorComponent::W);
+ const std::string height = tensor->component(TensorComponent::H);
+ const std::string wxh = tensor->component(TensorComponent::WxH);
+ /*
+ int x_s;
+ int y_s;
+ x_s = (xi_0 + x_k);
+ y_s = (yi_0 + y_k);
+ mi_0 = x_s + y_s * width + b * widthxheight;
+ mi_0 = select(-1, mi_0, x_s >= 0);
+ mi_0 = select(-1, mi_0, y_s >= 0);
+ mi_0 = select(-1, mi_0, x_s < 128);
+ mi_0 = select(-1, mi_0, y_s < 128);
+ */
+ compound_statement_begin();
+ declare_tile("_x_s", TileInfo(DataType::Int32));
+ declare_tile("_y_s", TileInfo(DataType::Int32));
+ auto x_s = operands.unpack(Operand("_x_s"));
+ auto y_s = operands.unpack(Operand("_y_s"));
+ for(int i = 0; i < dst->format().h; ++i)
+ {
+ // x_s = (xi_0 + x_k);
+ // y_s = (yi_0 + y_k);
+ _data->code += x_s->scalar(0, i).str;
+ _data->code += " = (";
+ _data->code += x->scalar(0, i).str;
+ _data->code += " + ";
+ _data->code += x_off->scalar(0, i).str;
+ _data->code += ");\n";
+ _data->code += y_s->scalar(0, i).str;
+ _data->code += " = (";
+ _data->code += y->scalar(0, i).str;
+ _data->code += " + ";
+ _data->code += y_off->scalar(0, i).str;
+ _data->code += ");\n";
+ // mi_0 = x_s + y_s * width;
+ _data->code += dst->scalar(0, i).str;
+ _data->code += " = ";
+ _data->code += x_s->scalar(0, i).str;
+ _data->code += " + ";
+ _data->code += y_s->scalar(0, i).str;
+ _data->code += " * " + width + ";\n";
+ // mi_0 = select(wxh, mi_0, x_s >= 0);
+ _data->code += dst->scalar(0, i).str;
+ _data->code += " = select(-1, ";
+ _data->code += dst->scalar(0, i).str;
+ _data->code += ", ";
+ _data->code += x_s->scalar(0, i).str;
+ _data->code += " >= 0);\n";
+ // mi_0 = select(wxh, mi_0, y_s >= 0);
+ _data->code += dst->scalar(0, i).str;
+ _data->code += " = select(-1, ";
+ _data->code += dst->scalar(0, i).str;
+ _data->code += ", ";
+ _data->code += y_s->scalar(0, i).str;
+ _data->code += " >= 0);\n";
+ // mi_0 = select(wxh, mi_0, x_s < width);
+ _data->code += dst->scalar(0, i).str;
+ _data->code += " = select(-1, ";
+ _data->code += dst->scalar(0, i).str;
+ _data->code += ", ";
+ _data->code += x_s->scalar(0, i).str;
+ _data->code += " < ";
+ _data->code += width + ");\n";
+ // mi_0 = select(wxh, mi_0, y_s < height);
+ _data->code += dst->scalar(0, i).str;
+ _data->code += " = select(-1, ";
+ _data->code += dst->scalar(0, i).str;
+ _data->code += ", ";
+ _data->code += y_s->scalar(0, i).str;
+ _data->code += " < ";
+ _data->code += height + ");\n";
+ }
+ compound_statement_end();
+ }
+
+private:
+ GpuKernelWriterDataHolder* _data { nullptr };
+ GpuKernelWriterAttribute * _attr { nullptr };
+};
+
+/** IGpuKernelWriter factory class */
+class GpuKernelWriterFactory final
+{
+public:
+ /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language
+ *
+ * @param[in] gpu GPU target
+ *
+ * @return IGpuKernelWriter
+ */
+ static std::unique_ptr<IGpuKernelWriter> create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
+ {
+ switch(x->programming_language())
+ {
+ case GpuTargetLanguage::OpenCL:
+ return std::make_unique<ClKernelWriter>(attr, x);
+ default:
+ std::cout << "Unsupported Gpu programming language" << std::endl;
+ assert(false);
+ }
+ }
+};
+
+inline int32_t adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx)
+{
+ auto tensor = tensor_info_id->shape;
+
+ int32_t dim[3] = {0};
+
+ switch(tensor_format)
+ {
+ case TensorSamplerFormat::C_W_H:
+ dim[0] = tensor[0];
+ dim[1] = tensor[1];
+ dim[2] = tensor[2];
+ break;
+ case TensorSamplerFormat::C_WH_1:
+ dim[0] = tensor[0];
+ dim[1] = tensor[1] * tensor[2];
+ dim[2] = 1;
+ break;
+ default:
+ std::cout << "Unsupported tensor format" << std::endl;
+ assert(false);
+ break;
+ }
+
+ return std::min(step, dim[idx]);
+}
+
+} // namespace prototype
+} // namespace ckw
+
+#endif // CKW_SRC_PROTOTYPE_H
diff --git a/compute_kernel_writer/src/TensorOperand.cpp b/compute_kernel_writer/src/TensorOperand.cpp
new file mode 100644
index 0000000000..00ecc3824e
--- /dev/null
+++ b/compute_kernel_writer/src/TensorOperand.cpp
@@ -0,0 +1,247 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ckw/TensorOperand.h"
+#include "ckw/Error.h"
+#include "ckw/Kernel.h"
+#include "ckw/TileOperand.h"
+#include "src/Prototype.h"
+
+namespace ckw
+{
+
+namespace
+{
+
+inline TensorComponentOperand &get_or_create_component(std::unique_ptr<TensorComponentOperand> &ptr, const ::std::string &name, TensorComponent component)
+{
+ if(ptr == nullptr)
+ {
+ ptr = std::make_unique<TensorComponentOperand>(name, component);
+ }
+
+ return *ptr;
+}
+
+} // namespace
+
+// =================================================================================================
+// TensorOperand
+// =================================================================================================
+
+TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info)
+ : OperandBase(name), _info(info)
+{
+}
+
+prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
+{
+ CKW_UNUSED(writer);
+ return { name() };
+}
+
+const TensorInfo &TensorOperand::info() const
+{
+ return _info;
+}
+
+TensorInfo &TensorOperand::info()
+{
+ return _info;
+}
+
+DataType TensorOperand::data_type() const
+{
+ return _info.data_type();
+}
+
+bool TensorOperand::is_constant() const
+{
+ return false;
+}
+
+const TileOperand &TensorOperand::tile() const
+{
+ return *_tile;
+}
+
+TileOperand &TensorOperand::tile()
+{
+ return *_tile;
+}
+
+TensorOperand &TensorOperand::tile(TileOperand &tile)
+{
+ _tile = &tile;
+ return *this;
+}
+
+const TensorTileSampler &TensorOperand::tile_sampler() const
+{
+ return _tile_sampler;
+}
+
+TensorTileSampler &TensorOperand::tile_sampler()
+{
+ return _tile_sampler;
+}
+
+TensorOperand &TensorOperand::tile_sampler(const TensorTileSampler &value)
+{
+ _tile_sampler = value;
+ return *this;
+}
+
+TileOperand &TensorOperand::stride1()
+{
+ return get_or_create_component(_stride1, name(), TensorComponent::Stride1);
+}
+
+TileOperand &TensorOperand::stride2()
+{
+ return get_or_create_component(_stride2, name(), TensorComponent::Stride2);
+}
+
+TileOperand &TensorOperand::stride3()
+{
+ return get_or_create_component(_stride3, name(), TensorComponent::Stride3);
+}
+
+TileOperand &TensorOperand::stride4()
+{
+ return get_or_create_component(_stride4, name(), TensorComponent::Stride4);
+}
+
+TileOperand &TensorOperand::dim0()
+{
+ return get_or_create_component(_dim0, name(), TensorComponent::Dim0);
+}
+
+TileOperand &TensorOperand::dim1()
+{
+ return get_or_create_component(_dim1, name(), TensorComponent::Dim1);
+}
+
+TileOperand &TensorOperand::dim2()
+{
+ return get_or_create_component(_dim2, name(), TensorComponent::Dim2);
+}
+
+TileOperand &TensorOperand::dim3()
+{
+ return get_or_create_component(_dim3, name(), TensorComponent::Dim3);
+}
+
+TileOperand &TensorOperand::dim4()
+{
+ return get_or_create_component(_dim4, name(), TensorComponent::Dim4);
+}
+
+TileOperand &TensorOperand::dim1_dim2()
+{
+ return get_or_create_component(_dim1_dim2, name(), TensorComponent::Dim1xDim2);
+}
+
+TileOperand &TensorOperand::dim1_dim2_dim3()
+{
+ return get_or_create_component(_dim1_dim2_dim3, name(), TensorComponent::Dim1xDim2xDim3);
+}
+
+TileOperand &TensorOperand::offset_first_element_in_bytes()
+{
+ return get_or_create_component(_offset_first_element_in_bytes, name(), TensorComponent::OffsetFirstElement);
+}
+
+// =================================================================================================
+// TensorComponentOperand
+// =================================================================================================
+
+TensorComponentOperand::TensorComponentOperand(const ::std::string &name, TensorComponent component)
+ : TileOperand(name, DataType::Int32), _component(component)
+{
+}
+
+prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
+{
+ CKW_UNUSED(writer);
+ prototype::OperandType type{ prototype::OperandType::Unknown };
+
+ switch(_component)
+ {
+ case TensorComponent::OffsetFirstElement:
+ type = prototype::OperandType::TensorDataOffset;
+ break;
+
+ case TensorComponent::Stride1:
+ type = prototype::OperandType::TensorStride1;
+ break;
+
+ case TensorComponent::Stride2:
+ type = prototype::OperandType::TensorStride2;
+ break;
+
+ case TensorComponent::Stride3:
+ type = prototype::OperandType::TensorStride3;
+ break;
+
+ case TensorComponent::Stride4:
+ type = prototype::OperandType::TensorStride4;
+ break;
+
+ case TensorComponent::Dim0:
+ type = prototype::OperandType::TensorDim0;
+ break;
+
+ case TensorComponent::Dim1:
+ type = prototype::OperandType::TensorDim1;
+ break;
+
+ case TensorComponent::Dim2:
+ type = prototype::OperandType::TensorDim2;
+ break;
+
+ case TensorComponent::Dim3:
+ type = prototype::OperandType::TensorDim3;
+ break;
+
+ case TensorComponent::Dim4:
+ type = prototype::OperandType::TensorDim4;
+ break;
+
+ case TensorComponent::Dim1xDim2:
+ type = prototype::OperandType::TensorDim1xDim2;
+ break;
+
+ case TensorComponent::Dim1xDim2xDim3:
+ type = prototype::OperandType::TensorDim1xDim2xDim3;
+ break;
+
+ default:
+ CKW_ASSERT(false);
+ }
+
+ return prototype::Operand(name(), type);
+}
+
+} // namespace ckw
diff --git a/compute_kernel_writer/src/TensorTileSampler.cpp b/compute_kernel_writer/src/TensorTileSampler.cpp
new file mode 100644
index 0000000000..143d550dec
--- /dev/null
+++ b/compute_kernel_writer/src/TensorTileSampler.cpp
@@ -0,0 +1,167 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ckw/TensorTileSampler.h"
+#include "ckw/TileOperand.h"
+#include "ckw/Types.h"
+
+namespace ckw
+{
+
+TensorTileSampler::TensorTileSampler()
+{
+}
+
+TensorTileSampler::TensorTileSampler(
+ TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
+ TensorSamplerFormat format,
+ TensorSamplerAddressModeX address_mode_x,
+ TensorSamplerAddressModeY address_mode_y,
+ TensorSamplerAddressModeZ address_mode_z)
+ : _x(&x), _y(&y), _z(&z), _b(&b), _height(0), _width(0), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z)
+{
+}
+
+TensorTileSampler::TensorTileSampler(
+ TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
+ int32_t height, int32_t width,
+ TensorSamplerFormat format,
+ TensorSamplerAddressModeX address_mode_x,
+ TensorSamplerAddressModeY address_mode_y,
+ TensorSamplerAddressModeZ address_mode_z)
+ : _x(&x), _y(&y), _z(&z), _b(&b), _height(height), _width(width), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z)
+{
+}
+
+const TileOperand &TensorTileSampler::x() const
+{
+ return *_x;
+}
+
+TensorTileSampler &TensorTileSampler::x(TileOperand &x)
+{
+ _x = &x;
+ return *this;
+}
+
+const TileOperand &TensorTileSampler::y() const
+{
+ return *_y;
+}
+
+TensorTileSampler &TensorTileSampler::y(TileOperand &y)
+{
+ _y = &y;
+ return *this;
+}
+
+const TileOperand &TensorTileSampler::z() const
+{
+ return *_z;
+}
+
+TensorTileSampler &TensorTileSampler::z(TileOperand &z)
+{
+ _z = &z;
+ return *this;
+}
+
+const TileOperand &TensorTileSampler::b() const
+{
+ return *_b;
+}
+
+TensorTileSampler &TensorTileSampler::b(TileOperand &b)
+{
+ _b = &b;
+ return *this;
+}
+
+int32_t TensorTileSampler::width() const
+{
+ return _width;
+}
+
+TensorTileSampler &TensorTileSampler::width(int32_t width)
+{
+ _width = width;
+ return *this;
+}
+
+int32_t TensorTileSampler::height() const
+{
+ return _height;
+}
+
+TensorTileSampler &TensorTileSampler::height(int32_t height)
+{
+ _height = height;
+ return *this;
+}
+
+TensorSamplerFormat TensorTileSampler::format() const
+{
+ return _format;
+}
+
+TensorTileSampler &TensorTileSampler::format(TensorSamplerFormat format)
+{
+ _format = format;
+ return *this;
+}
+
+TensorSamplerAddressModeX TensorTileSampler::address_mode_x() const
+{
+ return _address_mode_x;
+}
+
+TensorTileSampler &TensorTileSampler::address_mode_x(TensorSamplerAddressModeX address_mode_x)
+{
+ _address_mode_x = address_mode_x;
+ return *this;
+}
+
+TensorSamplerAddressModeY TensorTileSampler::address_mode_y() const
+{
+ return _address_mode_y;
+}
+
+TensorTileSampler &TensorTileSampler::address_mode_y(TensorSamplerAddressModeY address_mode_y)
+{
+ _address_mode_y = address_mode_y;
+ return *this;
+}
+
+TensorSamplerAddressModeZ TensorTileSampler::address_mode_z() const
+{
+ return _address_mode_z;
+}
+
+TensorTileSampler &TensorTileSampler::address_mode_z(TensorSamplerAddressModeZ address_mode_z)
+{
+ _address_mode_z = address_mode_z;
+ return *this;
+}
+
+} // namespace ckw
diff --git a/compute_kernel_writer/src/TileInfo.cpp b/compute_kernel_writer/src/TileInfo.cpp
index 6dd1957a7a..7d8b2654ef 100644
--- a/compute_kernel_writer/src/TileInfo.cpp
+++ b/compute_kernel_writer/src/TileInfo.cpp
@@ -36,7 +36,7 @@ TileInfo::TileInfo(DataType dt, int32_t w)
{
}
-TileInfo::TileInfo(DataType dt, int32_t w, int32_t h)
+TileInfo::TileInfo(DataType dt, int32_t h, int32_t w)
: _dt(dt), _shape({{w, h}})
{
}
diff --git a/compute_kernel_writer/src/TileOperand.cpp b/compute_kernel_writer/src/TileOperand.cpp
new file mode 100644
index 0000000000..091947628d
--- /dev/null
+++ b/compute_kernel_writer/src/TileOperand.cpp
@@ -0,0 +1,104 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "ckw/TileOperand.h"
+#include "ckw/Error.h"
+#include "src/Prototype.h"
+
+namespace ckw
+{
+
+TileOperand::TileOperand(const std::string &name, const TileInfo &info)
+ : OperandBase(name), _info(info), _value{ 0 }, _constant(false)
+{
+}
+
+TileOperand::TileOperand(const std::string &name, DataType data_type)
+ : OperandBase(name), _info(TileInfo{ data_type }), _value(0), _constant(false)
+{
+}
+
+TileOperand::TileOperand(const std::string &name, int32_t value)
+ : OperandBase(name), _info(TileInfo{ DataType::Int32 }), _value(value), _constant(true)
+{
+}
+
+TileOperand::TileOperand(const std::string &name, float value)
+ : OperandBase(name), _info(TileInfo{ DataType::Fp32 }), _value(value), _constant(true)
+{
+}
+
+prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
+{
+ CKW_UNUSED(writer);
+
+ if(_constant)
+ {
+ switch(_info.data_type())
+ {
+ case DataType::Int32:
+ return prototype::Operand(std::to_string(_value.get<int32_t>()), prototype::OperandType::ScalarInt32);
+
+ case DataType::Fp32:
+ return prototype::Operand(std::to_string(_value.get<float>()), prototype::OperandType::ScalarFp32);
+
+ default:
+ CKW_ASSERT(false);
+ }
+ }
+ else
+ {
+ return prototype::Operand(name(), prototype::OperandType::Tile);
+ }
+}
+
+const TileInfo &TileOperand::tile_info() const
+{
+ return _info;
+}
+
+DataType TileOperand::data_type() const
+{
+ return _info.data_type();
+}
+
+bool TileOperand::is_constant() const
+{
+ return _constant;
+}
+
+bool TileOperand::is_scalar() const
+{
+ return _info.width() == 1 && _info.height() == 1;
+}
+
+ScalarValue TileOperand::scalar_value() const
+{
+ CKW_ASSERT(is_scalar());
+ CKW_ASSERT(is_constant());
+
+ return _value;
+}
+
+} // namespace ckw
diff --git a/compute_kernel_writer/src/acl/AclComponentArgument.cpp b/compute_kernel_writer/src/acl/AclComponentArgument.cpp
new file mode 100644
index 0000000000..5cb909021e
--- /dev/null
+++ b/compute_kernel_writer/src/acl/AclComponentArgument.cpp
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "acl/AclComponentArgument.h"
+#include "ckw/Error.h"
+
+AclComponentArgument::AclComponentArgument()
+{
+}
+
+AclComponentArgument::AclComponentArgument(ckw::TensorOperand &tensor)
+ : _tensor(&tensor)
+{
+}
+
+AclComponentArgument &AclComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler)
+{
+ CKW_ASSERT(_tile == nullptr);
+
+ _tile = &tile;
+ _tile_sampler = tile_sampler;
+
+ return *this;
+}
+
+bool AclComponentArgument::has_tensor() const
+{
+ return _tensor != nullptr;
+}
+
+ckw::TensorOperand &AclComponentArgument::tensor()
+{
+ CKW_ASSERT(_tensor != nullptr);
+
+ return *_tensor;
+}
+
+const ckw::TensorOperand &AclComponentArgument::tensor() const
+{
+ CKW_ASSERT(_tensor != nullptr);
+
+ return *_tensor;
+}
+
+bool AclComponentArgument::has_tile() const
+{
+ return _tile != nullptr;
+}
+
+ckw::TileOperand &AclComponentArgument::tile()
+{
+ CKW_ASSERT(_tile != nullptr);
+
+ return *_tile;
+}
+
+const ckw::TileOperand &AclComponentArgument::tile() const
+{
+ CKW_ASSERT(_tile != nullptr);
+
+ return *_tile;
+}
+
+ckw::TensorTileSampler &AclComponentArgument::tile_sampler()
+{
+ CKW_ASSERT(_tile != nullptr);
+
+ return _tile_sampler;
+}
+
+const ckw::TensorTileSampler &AclComponentArgument::tile_sampler() const
+{
+ CKW_ASSERT(_tile != nullptr);
+
+ return _tile_sampler;
+}
diff --git a/compute_kernel_writer/src/acl/AclKernelWriter.cpp b/compute_kernel_writer/src/acl/AclKernelWriter.cpp
new file mode 100644
index 0000000000..a44e798c61
--- /dev/null
+++ b/compute_kernel_writer/src/acl/AclKernelWriter.cpp
@@ -0,0 +1,50 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "acl/AclKernelWriter.h"
+#include "acl/AclComponentArgument.h"
+#include "ckw/Error.h"
+#include "ckw/TileInfo.h"
+
+AclKernelWriter::AclKernelWriter(ckw::Kernel &kernel)
+ : KernelWriter(kernel)
+{
+}
+
+void AclKernelWriter::op_load_once(AclComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler)
+{
+ if(!tensor_or_tile->has_tile())
+ {
+ CKW_ASSERT(tensor_or_tile->has_tensor());
+
+ auto &tensor = tensor_or_tile->tensor();
+
+ const auto tile_name = tensor.name() + "_tile";
+ auto &tile = declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
+
+ op_load(tile, tensor, sampler);
+
+ tensor_or_tile->init_virtual_tensor(tile, sampler);
+ }
+}
diff --git a/compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp b/compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp
new file mode 100644
index 0000000000..2a73d47592
--- /dev/null
+++ b/compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "acl/AclScopedKernelWriter.h"
+#include "acl/AclKernelWriter.h"
+
+AclScopedKernelWriter::AclScopedKernelWriter(AclKernelWriter *writer)
+ : _writer(writer), _parent_id_space(writer->id_space())
+{
+ _writer->next_id_space();
+}
+
+AclScopedKernelWriter::AclScopedKernelWriter(const AclScopedKernelWriter &other)
+ : _writer(other._writer), _parent_id_space(other._writer->id_space())
+{
+ _writer->next_id_space();
+}
+
+AclKernelWriter *AclScopedKernelWriter::operator->()
+{
+ return _writer;
+}
+
+const AclKernelWriter *AclScopedKernelWriter::operator->() const
+{
+ return _writer;
+}
+
+AclKernelWriter *AclScopedKernelWriter::writer()
+{
+ return _writer;
+}
+
+const AclKernelWriter *AclScopedKernelWriter::writer() const
+{
+ return _writer;
+}