From ce3c48c7af02555f81c0f5e7ef2677916cecef34 Mon Sep 17 00:00:00 2001 From: Viet-Hoa Do Date: Mon, 3 Jul 2023 13:44:43 +0100 Subject: Move CKW prototype to separate directory Partially resolves: COMPMID-6283 Signed-off-by: Viet-Hoa Do Change-Id: I7596e3dc357d6f0b9cbe66534523943a73c26d81 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9864 Reviewed-by: SiCong Li Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Benchmark: Arm Jenkins --- Android.bp | 33 +- SConscript | 2 +- SConstruct | 5 +- compute_kernel_writer/CMakeLists.txt | 26 +- compute_kernel_writer/README.md | 1 - compute_kernel_writer/examples/add_exp_store.cpp | 181 - .../include/acl/AclComponentArgument.h | 111 - .../include/acl/AclKernelWriter.h | 56 - .../include/acl/AclScopedKernelWriter.h | 62 - compute_kernel_writer/include/ckw/Error.h | 66 +- compute_kernel_writer/include/ckw/Kernel.h | 77 - compute_kernel_writer/include/ckw/KernelWriter.h | 217 -- compute_kernel_writer/include/ckw/OperandBase.h | 76 - compute_kernel_writer/include/ckw/ScalarValue.h | 137 - compute_kernel_writer/include/ckw/TensorOperand.h | 181 - .../include/ckw/TensorTileSampler.h | 163 - compute_kernel_writer/include/ckw/TileOperand.h | 110 - compute_kernel_writer/include/ckw/Types.h | 119 +- compute_kernel_writer/prototype/CMakeLists.txt | 72 + .../prototype/examples/add_exp_store.cpp | 181 + .../examples/common/ExampleComponentArgument.cpp | 97 + .../examples/common/ExampleComponentArgument.h | 111 + .../examples/common/ExampleKernelWriter.cpp | 50 + .../examples/common/ExampleKernelWriter.h | 56 + .../examples/common/ExampleScopedKernelWriter.cpp | 58 + .../examples/common/ExampleScopedKernelWriter.h | 62 + .../prototype/include/ckw/Error.h | 79 + .../prototype/include/ckw/Kernel.h | 77 + .../prototype/include/ckw/KernelWriter.h | 217 ++ .../prototype/include/ckw/OperandBase.h | 76 + .../prototype/include/ckw/ScalarValue.h | 137 + .../prototype/include/ckw/TensorInfo.h | 145 + .../prototype/include/ckw/TensorOperand.h | 181 + .../prototype/include/ckw/TensorTileSampler.h | 163 + .../prototype/include/ckw/TileInfo.h | 84 + .../prototype/include/ckw/TileOperand.h | 110 + .../prototype/include/ckw/Types.h | 140 + compute_kernel_writer/prototype/src/Kernel.cpp | 61 + .../prototype/src/KernelWriter.cpp | 227 ++ .../prototype/src/OperandBase.cpp | 50 + compute_kernel_writer/prototype/src/Prototype.h | 3767 ++++++++++++++++++++ compute_kernel_writer/prototype/src/TensorInfo.cpp | 77 + .../prototype/src/TensorOperand.cpp | 247 ++ .../prototype/src/TensorTileSampler.cpp | 167 + compute_kernel_writer/prototype/src/TileInfo.cpp | 76 + .../prototype/src/TileOperand.cpp | 104 + compute_kernel_writer/src/Kernel.cpp | 61 - compute_kernel_writer/src/KernelWriter.cpp | 227 -- compute_kernel_writer/src/OperandBase.cpp | 50 - compute_kernel_writer/src/Prototype.h | 3767 -------------------- compute_kernel_writer/src/TensorOperand.cpp | 247 -- compute_kernel_writer/src/TensorTileSampler.cpp | 167 - compute_kernel_writer/src/TileOperand.cpp | 104 - .../src/acl/AclComponentArgument.cpp | 97 - compute_kernel_writer/src/acl/AclKernelWriter.cpp | 50 - .../src/acl/AclScopedKernelWriter.cpp | 58 - filelist.json | 3 + scripts/clang_tidy_rules.py | 2 +- .../gpu/ckw_driver/GpuCkwComponentArgument.cpp | 108 + .../gpu/ckw_driver/GpuCkwComponentArgument.h | 122 + .../sketch/gpu/ckw_driver/GpuCkwDriver.cpp | 12 +- .../sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp | 61 + .../sketch/gpu/ckw_driver/GpuCkwKernelWriter.h | 67 + .../gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp | 69 + .../gpu/ckw_driver/GpuCkwScopedKernelWriter.h | 73 + .../sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp | 16 +- .../sketch/gpu/ckw_driver/GpuCkwVariableTable.h | 11 +- .../sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h | 5 +- .../components/GpuCkwElementwiseBinary.cpp | 23 +- .../components/GpuCkwElementwiseBinary.h | 3 +- .../gpu/ckw_driver/components/GpuCkwStore.cpp | 6 +- .../sketch/gpu/ckw_driver/components/GpuCkwStore.h | 2 +- .../gpu/ckw_driver/components/utils/WriterHelper.h | 8 +- 73 files changed, 7462 insertions(+), 6452 deletions(-) delete mode 100644 compute_kernel_writer/examples/add_exp_store.cpp delete mode 100644 compute_kernel_writer/include/acl/AclComponentArgument.h delete mode 100644 compute_kernel_writer/include/acl/AclKernelWriter.h delete mode 100644 compute_kernel_writer/include/acl/AclScopedKernelWriter.h delete mode 100644 compute_kernel_writer/include/ckw/Kernel.h delete mode 100644 compute_kernel_writer/include/ckw/KernelWriter.h delete mode 100644 compute_kernel_writer/include/ckw/OperandBase.h delete mode 100644 compute_kernel_writer/include/ckw/ScalarValue.h delete mode 100644 compute_kernel_writer/include/ckw/TensorOperand.h delete mode 100644 compute_kernel_writer/include/ckw/TensorTileSampler.h delete mode 100644 compute_kernel_writer/include/ckw/TileOperand.h create mode 100644 compute_kernel_writer/prototype/CMakeLists.txt create mode 100644 compute_kernel_writer/prototype/examples/add_exp_store.cpp create mode 100644 compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp create mode 100644 compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h create mode 100644 compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp create mode 100644 compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h create mode 100644 compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp create mode 100644 compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h create mode 100644 compute_kernel_writer/prototype/include/ckw/Error.h create mode 100644 compute_kernel_writer/prototype/include/ckw/Kernel.h create mode 100644 compute_kernel_writer/prototype/include/ckw/KernelWriter.h create mode 100644 compute_kernel_writer/prototype/include/ckw/OperandBase.h create mode 100644 compute_kernel_writer/prototype/include/ckw/ScalarValue.h create mode 100644 compute_kernel_writer/prototype/include/ckw/TensorInfo.h create mode 100644 compute_kernel_writer/prototype/include/ckw/TensorOperand.h create mode 100644 compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h create mode 100644 compute_kernel_writer/prototype/include/ckw/TileInfo.h create mode 100644 compute_kernel_writer/prototype/include/ckw/TileOperand.h create mode 100644 compute_kernel_writer/prototype/include/ckw/Types.h create mode 100644 compute_kernel_writer/prototype/src/Kernel.cpp create mode 100644 compute_kernel_writer/prototype/src/KernelWriter.cpp create mode 100644 compute_kernel_writer/prototype/src/OperandBase.cpp create mode 100644 compute_kernel_writer/prototype/src/Prototype.h create mode 100644 compute_kernel_writer/prototype/src/TensorInfo.cpp create mode 100644 compute_kernel_writer/prototype/src/TensorOperand.cpp create mode 100644 compute_kernel_writer/prototype/src/TensorTileSampler.cpp create mode 100644 compute_kernel_writer/prototype/src/TileInfo.cpp create mode 100644 compute_kernel_writer/prototype/src/TileOperand.cpp delete mode 100644 compute_kernel_writer/src/Kernel.cpp delete mode 100644 compute_kernel_writer/src/KernelWriter.cpp delete mode 100644 compute_kernel_writer/src/OperandBase.cpp delete mode 100644 compute_kernel_writer/src/Prototype.h delete mode 100644 compute_kernel_writer/src/TensorOperand.cpp delete mode 100644 compute_kernel_writer/src/TensorTileSampler.cpp delete mode 100644 compute_kernel_writer/src/TileOperand.cpp delete mode 100644 compute_kernel_writer/src/acl/AclComponentArgument.cpp delete mode 100644 compute_kernel_writer/src/acl/AclKernelWriter.cpp delete mode 100644 compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp create mode 100644 src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h diff --git a/Android.bp b/Android.bp index ccf513a137..fd6ff3b4d3 100644 --- a/Android.bp +++ b/Android.bp @@ -178,8 +178,8 @@ cc_library_static { proprietary: true, local_include_dirs: ["build/android-arm64v8a/src/core", "build/android-arm64v8a/src/core/CL", - "compute_kernel_writer/include", - "compute_kernel_writer", + "compute_kernel_writer/prototype/include", + "compute_kernel_writer/prototype", "src/core/common", "src/core/helpers", "src/core/NEON/kernels/arm_gemm", @@ -189,24 +189,14 @@ cc_library_static { "src/cpu/kernels/assembly"], export_include_dirs: [".", "./include"], srcs: [ - "compute_kernel_writer/src/Error.cpp", - "compute_kernel_writer/src/Helpers.cpp", - "compute_kernel_writer/src/Kernel.cpp", - "compute_kernel_writer/src/KernelWriter.cpp", - "compute_kernel_writer/src/OperandBase.cpp", - "compute_kernel_writer/src/TensorInfo.cpp", - "compute_kernel_writer/src/TensorOperand.cpp", - "compute_kernel_writer/src/TensorTileSampler.cpp", - "compute_kernel_writer/src/TensorUtils.cpp", - "compute_kernel_writer/src/TileInfo.cpp", - "compute_kernel_writer/src/TileOperand.cpp", - "compute_kernel_writer/src/acl/AclComponentArgument.cpp", - "compute_kernel_writer/src/acl/AclKernelWriter.cpp", - "compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp", - "compute_kernel_writer/src/cl/CLConstantTile.cpp", - "compute_kernel_writer/src/cl/CLHelpers.cpp", - "compute_kernel_writer/src/cl/CLTile.cpp", - "compute_kernel_writer/src/cl/ICLTile.cpp", + "compute_kernel_writer/prototype/src/Kernel.cpp", + "compute_kernel_writer/prototype/src/KernelWriter.cpp", + "compute_kernel_writer/prototype/src/OperandBase.cpp", + "compute_kernel_writer/prototype/src/TensorInfo.cpp", + "compute_kernel_writer/prototype/src/TensorOperand.cpp", + "compute_kernel_writer/prototype/src/TensorTileSampler.cpp", + "compute_kernel_writer/prototype/src/TileInfo.cpp", + "compute_kernel_writer/prototype/src/TileOperand.cpp", "src/c/AclContext.cpp", "src/c/AclOperator.cpp", "src/c/AclQueue.cpp", @@ -642,7 +632,10 @@ cc_library_static { "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp", "src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp", "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp", diff --git a/SConscript b/SConscript index 9ddffc4273..5504d48e07 100644 --- a/SConscript +++ b/SConscript @@ -128,7 +128,7 @@ def build_library(name, build_env, sources, static=False, libs=[]): cloned_build_env["LINKFLAGS"].remove('-static-libstdc++') if env['experimental_dynamic_fusion']: - libs.append('libckw.a') + libs.append('libckw_prototype.a') if static: obj = cloned_build_env.StaticLibrary(name, source=sources, LIBS = arm_compute_env["LIBS"] + libs) diff --git a/SConstruct b/SConstruct index 4994972172..4ab9fdd66a 100644 --- a/SConstruct +++ b/SConstruct @@ -160,7 +160,7 @@ install_path = env['install_dir'] if not env['install_dir'].startswith('/') and install_path != "": install_path = "%s/%s" % (build_path, install_path) -env.Append(LIBPATH = [build_path]) +env.Append(LIBPATH = [build_path, os.path.join(build_path, "prototype")]) Export('env') Export('vars') @@ -432,7 +432,7 @@ if env['experimental_dynamic_fusion']: CKW_ENABLE_ASSERTS = env['debug'] or env['asserts'] CKW_PROJECT_DIR = Dir('.').path + "/compute_kernel_writer" - CKW_INCLUDE_DIR = CKW_PROJECT_DIR + "/include" + CKW_INCLUDE_DIR = CKW_PROJECT_DIR + "/prototype/include" CKW_BUILD_DIR = build_path.replace("#", "") CKW_CMAKE_CMD = "CC={CKW_CC} CXX={CKW_CXX} cmake -G \"Unix Makefiles\" " \ @@ -440,6 +440,7 @@ if env['experimental_dynamic_fusion']: "-DCMAKE_BUILD_TYPE={CKW_BUILD_TYPE} " \ "-DCKW_ENABLE_OPENCL={CKW_ENABLE_OPENCL} " \ "-DCKW_ENABLE_ASSERTS={CKW_ENABLE_ASSERTS} " \ + "-DCKW_BUILD_PROTOTYPE=ON " \ "-DCKW_CCACHE={CKW_CCACHE} ".format(CKW_CC=CKW_CC, CKW_CXX=CKW_CXX, CKW_PROJECT_DIR=CKW_PROJECT_DIR, diff --git a/compute_kernel_writer/CMakeLists.txt b/compute_kernel_writer/CMakeLists.txt index 2c770e4efb..186ab328f5 100644 --- a/compute_kernel_writer/CMakeLists.txt +++ b/compute_kernel_writer/CMakeLists.txt @@ -43,7 +43,6 @@ message(STATUS "${CMAKE_PROJECT_NAME} ${CMAKE_PROJECT_VERSION}") option(CKW_ENABLE_OPENCL "Enable OpenCL code generation" OFF) option(CKW_ENABLE_ASSERTS "Enable assertions. Always enabled in Debug builds" OFF) option(CKW_BUILD_TESTING "Build the Compute Kernel Writer validation test suite" OFF) -option(CKW_BUILD_EXAMPLES "Build the Compute Kernel Writer examples" OFF) option(CKW_CCACHE "Use compiler cache for faster recompilation" OFF) #--------------------------------------------------------------------- @@ -122,17 +121,6 @@ target_sources(ckw PRIVATE src/TensorInfo.cpp src/TensorUtils.cpp src/TileInfo.cpp - - src/Kernel.cpp - src/KernelWriter.cpp - src/OperandBase.cpp - src/TileOperand.cpp - src/TensorOperand.cpp - src/TensorTileSampler.cpp - - src/acl/AclKernelWriter.cpp - src/acl/AclScopedKernelWriter.cpp - src/acl/AclComponentArgument.cpp ) if(CKW_ENABLE_OPENCL) @@ -147,7 +135,7 @@ endif() target_include_directories(ckw PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include PRIVATE ${CMAKE_CURRENT_LIST_DIR} - ) +) #--------------------------------------------------------------------- # Validation tests @@ -172,16 +160,10 @@ if(CKW_BUILD_TESTING) endif() #--------------------------------------------------------------------- -# Examples - -function(add_ckw_example name) - add_executable(${name} ${ARGN}) - target_link_libraries(${name} PUBLIC ckw) -endfunction(add_ckw_example) +# Prototype -if(CKW_BUILD_EXAMPLES) - add_ckw_example(ckw_example_add_exp_store - examples/add_exp_store.cpp) +if(CKW_BUILD_PROTOTYPE) + add_subdirectory(prototype) endif() #--------------------------------------------------------------------- diff --git a/compute_kernel_writer/README.md b/compute_kernel_writer/README.md index 650eae2b48..8a24fe20ec 100644 --- a/compute_kernel_writer/README.md +++ b/compute_kernel_writer/README.md @@ -82,6 +82,5 @@ This project can be configured with the following build options. Enable options | CKW_ENABLE_OPENCL | Enable OpenCL code generation. | | CKW_ENABLE_ASSERTS | Enable assertions. Always enabled for Debug builds. | | CKW_BUILD_TESTING | Build the validation test suite. | -| CKW_BUILD_EXAMPLES | Build the examples. | | CKW_CCACHE | Use compiler cache for faster recompilation. | | CMAKE_TOOLCHAIN_FILE | When cross-compiling, set this variable to the path of the CMake toolchain file. | diff --git a/compute_kernel_writer/examples/add_exp_store.cpp b/compute_kernel_writer/examples/add_exp_store.cpp deleted file mode 100644 index ab2f3e15fb..0000000000 --- a/compute_kernel_writer/examples/add_exp_store.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/Error.h" -#include "ckw/KernelWriter.h" -#include "ckw/TensorOperand.h" -#include "ckw/TensorTileSampler.h" -#include "ckw/TileOperand.h" -#include "ckw/Types.h" - -#include "acl/AclComponentArgument.h" -#include "acl/AclKernelWriter.h" -#include "acl/AclScopedKernelWriter.h" - -#include -#include - -using namespace ckw; - -TensorTileSampler create_simple_sampler(AclScopedKernelWriter writer) -{ - TensorTileSampler sampler; - - constexpr int32_t m0 = 4; - constexpr int32_t n0 = 4; - - auto &gid_0 = writer->declare_tile("gid_0", DataType::Int32); - auto &gid_1 = writer->declare_tile("gid_1", DataType::Int32); - auto &gid_2 = writer->declare_tile("gid_2", DataType::Int32); - - auto &const_0 = writer->declare_tile("0", 0); - - writer->op_get_global_id(gid_0, 0); - writer->op_get_global_id(gid_1, 1); - writer->op_get_global_id(gid_2, 2); - - sampler.x(gid_0); - sampler.y(gid_1); - sampler.z(gid_2); - sampler.b(const_0); - - sampler.width(n0); - sampler.height(m0); - - sampler.format(TensorSamplerFormat::C_WH_1); - sampler.address_mode_x(TensorSamplerAddressModeX::None); - sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder); - sampler.address_mode_z(TensorSamplerAddressModeZ::Skip); - - return sampler; -} - -void op_binary_elementwise(AclScopedKernelWriter writer, std::vector operands) -{ - auto lhs = operands.at(0); - auto rhs = operands.at(1); - auto dst = operands.at(2); - - // Load the LHS and RHS tile and prepare the tensor sampler. - if(!lhs->has_tile() && !rhs->has_tile()) - { - const auto sampler = create_simple_sampler(writer); - - writer->op_load_once(lhs, sampler); - writer->op_load_once(rhs, sampler); - } - else if(lhs->has_tile()) - { - const auto &sampler = lhs->tile_sampler(); - writer->op_load_once(rhs, sampler); - } - else - { - const auto &sampler = rhs->tile_sampler(); - writer->op_load_once(lhs, sampler); - } - - auto &lhs_tile = lhs->tile(); - auto &rhs_tile = rhs->tile(); - const auto &sampler = lhs->tile_sampler(); - - // Prepare the output tile. - if(!dst->has_tile()) - { - auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info()); - dst->init_virtual_tensor(tile, sampler); - } - - auto &dst_tile = dst->tile(); - - // Perform the operation. - writer->op_binary_expression(dst_tile, lhs_tile, rhs_tile, BinaryOp::Add); -} - -void op_exp(AclScopedKernelWriter writer, std::vector operands) -{ - auto src = operands.at(0); - auto dst = operands.at(1); - - // Load the source tile and prepare the sampler. - if(!src->has_tile()) - { - const auto sampler = create_simple_sampler(writer); - writer->op_load_once(src, sampler); - } - - auto &src_tile = src->tile(); - const auto &sampler = src->tile_sampler(); - - // Prepare the output tile. - if(!dst->has_tile()) - { - auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info()); - dst->init_virtual_tensor(tile, sampler); - } - - auto &dst_tile = dst->tile(); - - // Perform the operation. - writer->op_scalar_function(dst_tile, src_tile, ScalarUnaryFunction::Exp); -} - -void op_store(AclScopedKernelWriter writer, std::vector operands) -{ - auto src = operands.at(0); - auto dst = operands.at(1); - - auto &src_tile = src->tile(); - const auto &sampler = src->tile_sampler(); - auto &dst_tensor = dst->tensor(); - - writer->op_store(dst_tensor, src_tile, sampler); -} - -int main() -{ - Kernel kernel("example", GpuTargetLanguage::OpenCL); - AclKernelWriter root_writer(kernel); - - AclScopedKernelWriter writer(&root_writer); - - const TensorInfo src0_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 0); - const TensorInfo src1_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 1); - const TensorInfo dst_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 2); - - AclComponentArgument src0(writer->create_tensor_argument("src0", src0_info)); - AclComponentArgument src1(writer->create_tensor_argument("src1", src1_info)); - AclComponentArgument dst(writer->create_tensor_argument("dst", dst_info)); - - AclComponentArgument ans; - - op_binary_elementwise(writer, { &src0, &src1, &ans }); - op_exp(writer, { &ans, &ans }); - op_store(writer, { &ans, &dst }); - - const auto code = root_writer.generate_code(); - std::cout << code; - - return 0; -} diff --git a/compute_kernel_writer/include/acl/AclComponentArgument.h b/compute_kernel_writer/include/acl/AclComponentArgument.h deleted file mode 100644 index 485b7a30bc..0000000000 --- a/compute_kernel_writer/include/acl/AclComponentArgument.h +++ /dev/null @@ -1,111 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_ACL_ACLCOMPONENTARGUMENT_H -#define CKW_INCLUDE_ACL_ACLCOMPONENTARGUMENT_H - -#include "ckw/TensorTileSampler.h" - -namespace ckw -{ -class TensorOperand; -class TileOperand; -} // namespace ckw - -/** The argument of a dynamic fusion component which can be either user tensor or virtual tensor. */ -class AclComponentArgument -{ -public: - /** Initialize a new instance of @ref AclComponentArgument class for empty virtual tensor. */ - AclComponentArgument(); - - /** Initialize a new instance of @ref AclComponentArgument class for user tensor. - * - * @param[in] tensor The user tensor. - */ - explicit AclComponentArgument(ckw::TensorOperand &tensor); - - /** Set virtual tensor information (tile, sampler) for the argument. - * - * If the component is a user tensor, it can be treated as virtual tensor as well - * and won't be loaded again using @ref AclKernelWriter::op_load_once method. - * - * @param[in] tile The tile that has been loaded. - * @param[in] sampler The tensor sampling information that has been used to load the tile. - */ - AclComponentArgument &init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &sampler); - - /** Get whether the argument is a user tensor. */ - bool has_tensor() const; - - /** Get the tensor operand. - * - * If the tensor is not available, throw an error. - */ - ckw::TensorOperand &tensor(); - - /** Get the tensor operand. - * - * If the tensor is not available, throw an error. - */ - const ckw::TensorOperand &tensor() const; - - /** Get whether the argument contains a tile. - * - * The argument can be either a user tensor that has been loaded, - * or a virtual tensor (i.e. a tile with tensor sampling information). - */ - bool has_tile() const; - - /** Get the tile operand. - * - * If the tile is not available, throw an error. - */ - ckw::TileOperand &tile(); - - /** Get the tile operand. - * - * If the tile is not available, throw an error. - */ - const ckw::TileOperand &tile() const; - - /** Get the tensor sampling information for the tile. - * - * If the tile is not available, throw an error. - */ - ckw::TensorTileSampler &tile_sampler(); - - /** Get the tensor sampling information for the tile. - * - * If the tile is not available, throw an error. - */ - const ckw::TensorTileSampler &tile_sampler() const; - -private: - ckw::TensorOperand *_tensor{ nullptr }; - ckw::TileOperand *_tile{ nullptr }; - ckw::TensorTileSampler _tile_sampler{}; -}; - -#endif // CKW_INCLUDE_ACL_ACLCOMPONENTARGUMENT_H diff --git a/compute_kernel_writer/include/acl/AclKernelWriter.h b/compute_kernel_writer/include/acl/AclKernelWriter.h deleted file mode 100644 index 27b7add6ef..0000000000 --- a/compute_kernel_writer/include/acl/AclKernelWriter.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_ACL_ACLKERNELWRITER_H -#define CKW_INCLUDE_ACL_ACLKERNELWRITER_H - -#include "ckw/KernelWriter.h" -#include "ckw/TensorTileSampler.h" - -class AclComponentArgument; - -namespace ckw -{ -class Kernel; -} // namespace ckw - -/** Extended implementation of kernel writer for dynamic fusion. */ -class AclKernelWriter : public ckw::KernelWriter -{ -public: - /** Initialize a new instance of @ref AclKernelWriter class. - * - * @param[in] kernel The kernel to be generated. - */ - explicit AclKernelWriter(ckw::Kernel &kernel); - - /** Load the user tensor to the tile in the same component argument if it hasn't been loaded. - * - * @param[in] tensor_or_tile The component argument that is either a user tensor or a virtual tensor. - * @param[in] sampler The tensor sampling information to load the tile. - */ - void op_load_once(AclComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler); -}; - -#endif // CKW_INCLUDE_ACL_ACLKERNELWRITER_H diff --git a/compute_kernel_writer/include/acl/AclScopedKernelWriter.h b/compute_kernel_writer/include/acl/AclScopedKernelWriter.h deleted file mode 100644 index 6cb957bfb5..0000000000 --- a/compute_kernel_writer/include/acl/AclScopedKernelWriter.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_ACL_ACLSCOPEDKERNELWRITER_H -#define CKW_INCLUDE_ACL_ACLSCOPEDKERNELWRITER_H - -#include - -class AclKernelWriter; - -/** Helper to automatically manage kernel writer ID space. */ -class AclScopedKernelWriter -{ -public: - /** Initialize a new instance of @ref AclScopedKernelWriter class. */ - explicit AclScopedKernelWriter(AclKernelWriter *writer); - - /** Create a new scope from the specified scoped kernel writer. */ - AclScopedKernelWriter(const AclScopedKernelWriter &other); - - /** Assignment is disallowed. */ - AclScopedKernelWriter &operator=(const AclScopedKernelWriter &) = delete; - - /** Access the underlying kernel writer. */ - AclKernelWriter *operator->(); - - /** Access the underlying kernel writer. */ - const AclKernelWriter *operator->() const; - - /** Get the kernel writer. */ - AclKernelWriter *writer(); - - /** Get the kernel writer. */ - const AclKernelWriter *writer() const; - -private: - AclKernelWriter *_writer; - int32_t _parent_id_space; -}; - -#endif // CKW_INCLUDE_ACL_ACLSCOPEDKERNELWRITER_H diff --git a/compute_kernel_writer/include/ckw/Error.h b/compute_kernel_writer/include/ckw/Error.h index 8c4853722b..996893823e 100644 --- a/compute_kernel_writer/include/ckw/Error.h +++ b/compute_kernel_writer/include/ckw/Error.h @@ -21,12 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef COMPUTE_KERNEL_WRITER_INCLUDE_CKW_ERROR_H +#define COMPUTE_KERNEL_WRITER_INCLUDE_CKW_ERROR_H -#ifndef CKW_INCLUDE_CKW_ERROR_H -#define CKW_INCLUDE_CKW_ERROR_H - -#include #include +#include namespace ckw { @@ -45,59 +44,16 @@ std::string create_error_msg(const std::string &file, const std::string &func, c * * @param[in] msg Message to display. */ -#define COMPUTE_KERNEL_WRITER_ERROR_ON_MSG(msg) \ - do \ - { \ - const std::string arg0(__FILE__); \ - const std::string arg1(__func__); \ - const std::string arg2(std::to_string(__LINE__)); \ - const std::string arg3(msg); \ +#define COMPUTE_KERNEL_WRITER_ERROR_ON_MSG(msg) \ + do \ + { \ + const std::string arg0(__FILE__); \ + const std::string arg1(__func__); \ + const std::string arg2(std::to_string(__LINE__)); \ + const std::string arg3(msg); \ std::runtime_error(create_error_msg(arg0, arg1, arg2, arg3)); \ } while(false) -/** If the condition is not met, throw an std::runtime_error with the specified message. - * - * @param[in] cond The condition that is expected to be true. - * @param[in] msg The error message when the condition is not met. - */ -#define CKW_ASSERT_MSG(cond, msg) \ - do \ - { \ - if(!(cond)) \ - { \ - throw ::std::runtime_error(msg); \ - } \ - } while(false) - -/** If the condition is not met, throw an std::runtime_error. - * - * @param[in] cond The condition that is expected to be true. - */ -#define CKW_ASSERT(cond) CKW_ASSERT_MSG(cond, #cond) - -/** If the precondition is met but the consequence is not met, throw an std::runtime_error. - * - * @param[in] precond The condition if is met requires the consequence must also be met. - * @param[in] cond The condition that is expected to be true if the precondition is true. - */ -#define CKW_ASSERT_IF(precond, cond) \ - CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond) - -/** Mark the variables as unused. - * - * @param[in] ... Variables which are unused. - */ -#define CKW_UNUSED(...) ::ckw::ignore_unused(__VA_ARGS__) // NOLINT - -/** Mark the variables as unused. - * - * @param[in] ... Variables which are unused. - */ -template -inline void ignore_unused(T &&...) -{ -} - } // namespace ckw -#endif // CKW_INCLUDE_CKW_ERROR_H +#endif /* COMPUTE_KERNEL_WRITER_INCLUDE_CKW_ERROR_H */ diff --git a/compute_kernel_writer/include/ckw/Kernel.h b/compute_kernel_writer/include/ckw/Kernel.h deleted file mode 100644 index cbc7700c22..0000000000 --- a/compute_kernel_writer/include/ckw/Kernel.h +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_KERNEL_H -#define CKW_INCLUDE_CKW_KERNEL_H - -#include "ckw/OperandBase.h" -#include "ckw/Types.h" - -#include -#include -#include - -namespace ckw -{ - -namespace prototype -{ -class GpuKernelWriterDataHolder; -} // namespace prototype - -/** The target for kernel writer to write into. */ -class Kernel -{ -public: - /** Constructor - * - * @param[in] name The name of the kernel function. - * @param[in] language The programming language to write the kernel. - */ - Kernel(const char *name, GpuTargetLanguage language); - - /** Destructor */ - ~Kernel(); - - /** Get the name of the kernel function. */ - const std::string &name() const; - - /** (Internal use only) Get the map from operand name to the operand declared in this kernel. */ - const ::std::map<::std::string, ::std::unique_ptr> &operands() const; - - /** (Internal use only) Get the map from operand name to the operand declared in this kernel. */ - ::std::map<::std::string, ::std::unique_ptr> &operands(); - - /** (Internal use only) Get the implementation data. */ - prototype::GpuKernelWriterDataHolder *impl(); - -private: - ::std::string _name; - ::std::unique_ptr _kernel; - ::std::map<::std::string, ::std::unique_ptr> _operands; -}; - -} // namespace ckw - -#endif // CKW_INCLUDE_CKW_KERNEL_H diff --git a/compute_kernel_writer/include/ckw/KernelWriter.h b/compute_kernel_writer/include/ckw/KernelWriter.h deleted file mode 100644 index 5dce62a14c..0000000000 --- a/compute_kernel_writer/include/ckw/KernelWriter.h +++ /dev/null @@ -1,217 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_KERNELWRITER_H -#define CKW_INCLUDE_CKW_KERNELWRITER_H - -#include "ckw/Kernel.h" -#include "ckw/TensorInfo.h" -#include "ckw/TensorOperand.h" -#include "ckw/TileInfo.h" -#include "ckw/TileOperand.h" - -#include - -namespace ckw -{ - -namespace prototype -{ -struct GpuKernelWriterAttribute; -class IGpuKernelWriter; -} // namespace prototype - -/** Kernel writer. */ -class KernelWriter -{ -public: - // ============================================================================================= - // Constructors and destructor - // ============================================================================================= - - /** Initialize a new instance of kernel writer. - * - * @param[in] kernel The kernel to be written to. - */ - explicit KernelWriter(Kernel &kernel); - - /** Destructor */ - ~KernelWriter(); - - /** No copy constructor. */ - KernelWriter(const KernelWriter &) = delete; - - /** No copy assignment. */ - KernelWriter &operator=(const KernelWriter &) = delete; - - // ============================================================================================= - // Scope management - // ============================================================================================= - - /** Get the current ID space. */ - int32_t id_space() const; - - /** Set the current ID space. */ - KernelWriter &id_space(int32_t id_space); - - /** Switch to and return a new ID space. */ - int32_t next_id_space(); - - // ============================================================================================= - // Tensor and tile declaration - // ============================================================================================= - - /** Define a tensor argument. - * - * @param[in] name The name of the tensor. - * @param[in] info The tensor info. - * - * @return The @ref TensorOperand object. - */ - TensorOperand &create_tensor_argument(const char *name, const TensorInfo &info); - - /** Define a compile-time constant scalar argument. - * - * @param[in] name The name of the tile. - * @param[in] value The value of the tile. - * - * @return The @ref TileOperand object. - */ - TileOperand &create_tile_argument(const char *name, int32_t value); - - /** Declare a new tile. - * - * The name of the tile must be unique in the current ID space. - * - * @param[in] name The name of the tile. - * @param[in] ... The necessary arguments to create a new @ref TileOperand. - * - * @return The @ref TileOperand object. - */ - template - TileOperand &declare_tile(const char *name, TArgs &&...args) - { - const auto var_name = generate_variable_name(name); - auto operand = new TileOperand(var_name, ::std::forward(args)...); - register_operand(operand, true); - - return *operand; - } - - // ============================================================================================= - // Load and store - // ============================================================================================= - - /** Load the data from the tensor memory to the tile using the sampling information. - * - * @param[out] tile The tile to be loaded. - * @param[in] tensor The tensor to be read. - * @param[in] sampler The tensor sampling information. - */ - void op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler); - - /** Store the tile to the tensor using the specified sampling information. - * - * @param[out] dst The tensor that the tile is written to. - * @param[in] src The tile to be stored. - * @param[in] sampler The tensor sampling information. - */ - void op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler); - - // ============================================================================================= - // Data processing - // ============================================================================================= - - /** Write assignment: ` = `. - * - * @param[in] dst The destination tile. - * @param[in] src The source tile. - */ - void op_assign(TileOperand &dst, const TileOperand &src); - - /** Write binary expression: ` = `. - * - * @param[in] dst The destination tile. - * @param[in] lhs The LHS operand. - * @param[in] rhs The RHS operand. - * @param[in] op The binary operator. - */ - void op_binary_expression(TileOperand &dst, const TileOperand &lhs, const TileOperand &rhs, BinaryOp op); - - /** Write function applied to scalar value: ` = ()`. - * - * @param[in] dst The destination tile. - * @param[in] src The source tile. - * @param[in] func The function to be applied to the source tile. - */ - void op_scalar_function(TileOperand &dst, const TileOperand &src, ScalarUnaryFunction func); - - // ============================================================================================= - // Misc - // ============================================================================================= - - /** Set `dst` the global ID of dimension `dim`. - * - * @param[in] dst The tile to be written to. - * @param[in] dim The global ID dimension. - */ - void op_get_global_id(TileOperand &dst, int32_t dim); - - // ============================================================================================= - // Code generation - // ============================================================================================= - - /** Generate the source code of the kernel. */ - ::std::string generate_code(); - -private: - /** Generate the full variable name based on the original name and the ID space. - * - * @param[in] name The name of the variable. - * - * @return The full variable name. - */ - ::std::string generate_variable_name(const char *name) const; - - /** Register the operand to the kernel. - * - * The operand is uniquely owned by the kernel afterward. - * - * @param[in] operand The operand to be registered. - * @param[in] declaring Whether the tile declaration is generated. - */ - void register_operand(OperandBase *operand, bool declaring); - -private: - Kernel *_kernel; - ::std::unique_ptr _impl_attr; - ::std::unique_ptr _impl; - - int32_t _id_space{ 0 }; - int32_t _max_id_space{ 0 }; -}; - -} // namespace ckw - -#endif // CKW_INCLUDE_CKW_KERNELWRITER_H diff --git a/compute_kernel_writer/include/ckw/OperandBase.h b/compute_kernel_writer/include/ckw/OperandBase.h deleted file mode 100644 index 0ea5030968..0000000000 --- a/compute_kernel_writer/include/ckw/OperandBase.h +++ /dev/null @@ -1,76 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_OPERANDBASE_H -#define CKW_INCLUDE_CKW_OPERANDBASE_H - -#include "ckw/Types.h" -#include - -namespace ckw -{ -namespace prototype -{ -class IGpuKernelWriter; -class Operand; -} // namespace prototype - -/** The base class for all operands. */ -class OperandBase -{ -public: - /** Constructor - * - * @param[in] name The name of the operand. - */ - explicit OperandBase(const ::std::string &name); - - /** Destructor */ - virtual ~OperandBase(); - - /** (Internal use only) Create the implementation operand. - * - * @param[in] writer The implementation kernel writer. - */ - virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const = 0; - - /** Get the name of the operand. */ - const ::std::string &name() const; - - /** Set the name of the operand. */ - OperandBase &name(const ::std::string &name); - - /** Get the data type of the operand. */ - virtual DataType data_type() const = 0; - - /** Get whether the operand is compile-time constant. */ - virtual bool is_constant() const = 0; - -private: - ::std::string _name; -}; - -} // namespace ckw - -#endif // CKW_INCLUDE_CKW_OPERANDBASE_H diff --git a/compute_kernel_writer/include/ckw/ScalarValue.h b/compute_kernel_writer/include/ckw/ScalarValue.h deleted file mode 100644 index cf017d435f..0000000000 --- a/compute_kernel_writer/include/ckw/ScalarValue.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_SCALARVALUE_H -#define CKW_INCLUDE_CKW_SCALARVALUE_H - -#include "ckw/Error.h" - -#include - -namespace ckw -{ - -/** The scalar value known at compile-time. */ -class ScalarValue -{ -public: - /** Initialize a new instance of @ref ScalarValue class with integer value 0. */ - ScalarValue() - { - _type = Type::INT; - _value.i64 = 0; - } - - /** Initialize a new instance of @ref ScalarValue class with the specified value. */ - template - ScalarValue(T value) - { - set(value); - } - - /** Set the value. */ - template - void set(T value) - { - CKW_ASSERT(::std::is_integral::value || ::std::is_floating_point::value); - CKW_ASSERT(sizeof(T) <= 8); - - _size = sizeof(T); - - if(::std::is_integral::value) - { - if(::std::is_signed::value) - { - _type = Type::INT; - _value.i64 = value; - } - else - { - _type = Type::UINT; - _value.u64 = value; - } - } - else - { - _type = Type::FLOAT; - _value.f64 = value; - } - } - - /** Get the value. - * - * The caller must make sure that what has been stored in the object must fit - * the output data type without data corruption or loss of accuracy. - */ - template - T get() const - { - CKW_ASSERT(::std::is_integral::value || ::std::is_floating_point::value); - CKW_ASSERT(sizeof(T) >= _size); - - if(::std::is_integral::value) - { - if(::std::is_signed::value) - { - CKW_ASSERT(_type == Type::INT || _type == Type::UINT); - CKW_ASSERT_IF(_type == Type::UINT, sizeof(T) > _size); - - return _value.i64; - } - else - { - CKW_ASSERT(_type == Type::INT); - - return _value.u64; - } - } - else - { - return _value.f64; - } - } - -private: - union Value - { - int64_t i64; - uint64_t u64; - double f64; - }; - - enum class Type : int32_t - { - UINT, - INT, - FLOAT, - }; - - Value _value{}; - Type _type{}; - uint32_t _size{}; -}; - -} // namespace ckw - -#endif // CKW_INCLUDE_CKW_SCALARVALUE_H diff --git a/compute_kernel_writer/include/ckw/TensorOperand.h b/compute_kernel_writer/include/ckw/TensorOperand.h deleted file mode 100644 index 130ab596fb..0000000000 --- a/compute_kernel_writer/include/ckw/TensorOperand.h +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_TENSOROPERAND_H -#define CKW_INCLUDE_CKW_TENSOROPERAND_H - -#include "ckw/OperandBase.h" -#include "ckw/TensorInfo.h" -#include "ckw/TensorTileSampler.h" -#include "ckw/TileOperand.h" -#include "ckw/Types.h" - -#include - -namespace ckw -{ - -class TensorComponentOperand; - -// ================================================================================================= -// TensorOperand -// ================================================================================================= - -/** Tensor operand */ -class TensorOperand : public OperandBase -{ -public: - /** Initialize a new instance of @ref TensorOperand class. - * - * @param[in] name The name of the tensor. - * @param[in] info The tensor info. - */ - TensorOperand(const ::std::string &name, const TensorInfo &info); - - /** No copy constructor. */ - TensorOperand(const TensorOperand &other) = delete; - - /** No copy assignment. */ - TensorOperand &operator=(const TensorOperand &other) = delete; - - /** (Internal use only) Create the implementation operand. - * - * @param[in] writer The implementation kernel writer. - */ - virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override; - - /** Get the tensor info. */ - const TensorInfo &info() const; - - /** Get the tensor info. */ - TensorInfo &info(); - - /** Get the data type. */ - virtual DataType data_type() const override; - - /** Get whether the tensor is compile-time constant. */ - virtual bool is_constant() const override; - - /** Get the default tile attached to the tensor. */ - const TileOperand &tile() const; - - /** Get the default tile attached to the tensor. */ - TileOperand &tile(); - - /** Set the default tile attached to the tensor. */ - TensorOperand &tile(TileOperand &tile); - - /** Get the tensor sampler of the default tile. */ - const TensorTileSampler &tile_sampler() const; - - /** Get the tensor sampler of the default tile. */ - TensorTileSampler &tile_sampler(); - - /** Set the tensor sampler of the default tile. */ - TensorOperand &tile_sampler(const TensorTileSampler &value); - - /** Get the operand that contains the stride in y dimension of the tensor. */ - TileOperand &stride1(); - - /** Get the operand that contains the stride in z dimension of the tensor. */ - TileOperand &stride2(); - - /** Get the operand that contains the stride in w dimension of the tensor. */ - TileOperand &stride3(); - - /** Get the operand that contains the stride in w dimension of the tensor. */ - TileOperand &stride4(); - - /** Get the operand that contains the size of dimension 0 of the tensor. */ - TileOperand &dim0(); - - /** Get the operand that contains the size of dimension 1 of the tensor. */ - TileOperand &dim1(); - - /** Get the operand that contains the size of dimension 2 of the tensor. */ - TileOperand &dim2(); - - /** Get the operand that contains the size of dimension 3 of the tensor. */ - TileOperand &dim3(); - - /** Get the operand that contains the size of dimension 4 of the tensor. */ - TileOperand &dim4(); - - /** Get the operand that contains the size of dimensions 1 and 2 collapsed. */ - TileOperand &dim1_dim2(); - - /** Get the operand that contains the size of dimensions 1, 2 and 3 collapsed. */ - TileOperand &dim1_dim2_dim3(); - - /** Get the operand that contains the offset in bytes to the first element. */ - TileOperand &offset_first_element_in_bytes(); - -private: - TensorInfo _info; - - TileOperand *_tile{ nullptr }; - TensorTileSampler _tile_sampler{}; - - ::std::unique_ptr _stride1{ nullptr }; - ::std::unique_ptr _stride2{ nullptr }; - ::std::unique_ptr _stride3{ nullptr }; - ::std::unique_ptr _stride4{ nullptr }; - ::std::unique_ptr _dim0{ nullptr }; - ::std::unique_ptr _dim1{ nullptr }; - ::std::unique_ptr _dim2{ nullptr }; - ::std::unique_ptr _dim3{ nullptr }; - ::std::unique_ptr _dim4{ nullptr }; - ::std::unique_ptr _dim1_dim2{ nullptr }; - ::std::unique_ptr _dim1_dim2_dim3{ nullptr }; - ::std::unique_ptr _offset_first_element_in_bytes{ nullptr }; -}; - -// ================================================================================================= -// TensorComponentOperand -// ================================================================================================= - -/** Tile operand that contains tensor information. */ -class TensorComponentOperand : public TileOperand -{ -public: - /** Initialize a new instance of @ref TensorComponentOperand class. - * - * @param[in] name The name of the operand. - * @param[in] component The tensor info component. - */ - TensorComponentOperand(const ::std::string &name, TensorComponent component); - - /** (Internal use only) Create the implementation operand. - * - * @param[in] writer The implementation kernel writer. - */ - virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override; - -private: - TensorComponent _component; -}; - -} // namespace ckw - -#endif // CKW_INCLUDE_CKW_TENSOROPERAND_H diff --git a/compute_kernel_writer/include/ckw/TensorTileSampler.h b/compute_kernel_writer/include/ckw/TensorTileSampler.h deleted file mode 100644 index 5ef7bca647..0000000000 --- a/compute_kernel_writer/include/ckw/TensorTileSampler.h +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_TENSORTILESAMPLER_H -#define CKW_INCLUDE_CKW_TENSORTILESAMPLER_H - -#include "ckw/Types.h" -#include - -namespace ckw -{ - -class TileOperand; - -/** Tensor sampler - * - * It contains information about how the result tile should be stored to tensor memory. - * It can also be used to dictate how the subsequent operators fetch the input tensor. - */ -class TensorTileSampler -{ -public: - /** Initialize a new instance of @ref TensorSampler class. */ - TensorTileSampler(); - - /** Initialize a new instance of @ref TensorSampler class. - * - * @param[in] x The coordinate in the x dimension. - * @param[in] y The coordinate in the y dimension. - * @param[in] z The coordinate in the z dimension. - * @param[in] b The coordinate in the batch dimension. - * @param[in] format The tensor data format. - * @param[in] address_mode_x The address mode of the x dimension. - * @param[in] address_mode_y The address mode of the y dimension. - * @param[in] address_mode_z The address mode of the z dimension. - */ - TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z); - - /** Initialize a new instance of @ref TensorSampler class. - * - * @param[in] x The coordinate in the x dimension. - * @param[in] y The coordinate in the y dimension. - * @param[in] z The coordinate in the z dimension. - * @param[in] b The coordinate in the batch dimension. - * @param[in] height The height of the tile. - * @param[in] width The width of the tile. - * @param[in] format The tensor data format. - * @param[in] address_mode_x The address mode of the x dimension. - * @param[in] address_mode_y The address mode of the y dimension. - * @param[in] address_mode_z The address mode of the z dimension. - */ - TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - int32_t height, int32_t width, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z); - - /** Get the coordinate in the x dimension. */ - const TileOperand &x() const; - - /** Set the coordinate in the x dimension. */ - TensorTileSampler &x(TileOperand &x); - - /** Get the coordinate in the y dimension. */ - const TileOperand &y() const; - - /** Set the coordinate in the y dimension. */ - TensorTileSampler &y(TileOperand &y); - - /** Get the coordinate in the z dimension. */ - const TileOperand &z() const; - - /** Set the coordinate in the z dimension. */ - TensorTileSampler &z(TileOperand &z); - - /** Get the coordinate in the batch dimension. */ - const TileOperand &b() const; - - /** Set the coordinate in the batch dimension. */ - TensorTileSampler &b(TileOperand &b); - - /** Get the width of the tile. */ - int32_t width() const; - - /** Set the width of the tile. */ - TensorTileSampler &width(int32_t width); - - /** Get the height of the tile. */ - int32_t height() const; - - /** Set the height of the tile. */ - TensorTileSampler &height(int32_t height); - - /** Get the format of the tensor. */ - TensorSamplerFormat format() const; - - /** Set the format of the tensor. */ - TensorTileSampler &format(TensorSamplerFormat format); - - /** Get the address mode of the x dimension. */ - TensorSamplerAddressModeX address_mode_x() const; - - /** Set the address mode of the x-dimension. */ - TensorTileSampler &address_mode_x(TensorSamplerAddressModeX address_mode_x); - - /** Get the address mode of the y dimension. */ - TensorSamplerAddressModeY address_mode_y() const; - - /** Set the address mode of the y dimension. */ - TensorTileSampler &address_mode_y(TensorSamplerAddressModeY address_mode_y); - - /** Get the address mode of the z dimension. */ - TensorSamplerAddressModeZ address_mode_z() const; - - /** Set the address mode of the z dimension. */ - TensorTileSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z); - -private: - TileOperand *_x{ nullptr }; - TileOperand *_y{ nullptr }; - TileOperand *_z{ nullptr }; - TileOperand *_b{ nullptr }; - - int32_t _height{ 0 }; - int32_t _width{ 0 }; - - TensorSamplerFormat _format{ TensorSamplerFormat::Unknown }; - TensorSamplerAddressModeX _address_mode_x{ TensorSamplerAddressModeX::Unknown }; - TensorSamplerAddressModeY _address_mode_y{ TensorSamplerAddressModeY::Unknown }; - TensorSamplerAddressModeZ _address_mode_z{ TensorSamplerAddressModeZ::Unknown }; -}; - -} // namespace ckw - -#endif // CKW_INCLUDE_CKW_TENSORTILESAMPLER_H diff --git a/compute_kernel_writer/include/ckw/TileOperand.h b/compute_kernel_writer/include/ckw/TileOperand.h deleted file mode 100644 index 1eee18589f..0000000000 --- a/compute_kernel_writer/include/ckw/TileOperand.h +++ /dev/null @@ -1,110 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_TILEOPERAND_H -#define CKW_INCLUDE_CKW_TILEOPERAND_H - -#include "ckw/Error.h" -#include "ckw/OperandBase.h" -#include "ckw/ScalarValue.h" -#include "ckw/TileInfo.h" - -#include - -namespace ckw -{ - -class Kernel; - -/** Tile operand which can be either scalar, vector or 2D tile. */ -class TileOperand : public OperandBase -{ -public: - /** Initialize a new instance of @ref TileOperand class with the tile information. - * - * @param[in] name The name of the tile. - * @param[in] tile_info The tile info. - */ - TileOperand(const ::std::string &name, const TileInfo &tile_info); - - /** Initialize a new instance of @ref TileOperand for scalar variable. - * - * @param[in] name The name of the tile. - * @param[in] data_type The data type of the tile. - */ - TileOperand(const ::std::string &name, DataType data_type); - - /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable. - * - * @param[in] name The name of the tile. - * @param[in] value The value of the tile. - */ - TileOperand(const ::std::string &name, int32_t value); - - /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable. - * - * @param[in] name The name of the tile. - * @param[in] value The value of the tile. - */ - TileOperand(const ::std::string &name, float value); - - /** Prohibit copy of tile operand. */ - TileOperand(const TileOperand &) = delete; - - /** Prohibit copy of tile operand. */ - TileOperand &operator=(const TileOperand &) = delete; - - /** (Internal use only) Create the implementation operand. - * - * @param[in] writer The implementation kernel writer. - */ - virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override; - - /** Get the tile info. */ - const TileInfo &tile_info() const; - - /** Get the data type of the tile. */ - virtual DataType data_type() const override; - - /** Get whether the tile is compile-time constant. */ - virtual bool is_constant() const override; - - /** Get whether the tile is a scalar value. */ - bool is_scalar() const; - - /** Get the scalar value of the tile. - * - * The tile must have the shape of 1, 1 (i.e. scalar). - */ - ScalarValue scalar_value() const; - -private: - TileInfo _info; - ScalarValue _value{}; - bool _constant; -}; - -} // namespace ckw - -#endif // CKW_INCLUDE_CKW_TILEOPERAND_H diff --git a/compute_kernel_writer/include/ckw/Types.h b/compute_kernel_writer/include/ckw/Types.h index 5516718e54..c9f80b65e0 100644 --- a/compute_kernel_writer/include/ckw/Types.h +++ b/compute_kernel_writer/include/ckw/Types.h @@ -21,120 +21,25 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ - -#ifndef CKW_INCLUDE_CKW_TYPES_H -#define CKW_INCLUDE_CKW_TYPES_H - -#include -#include +#ifndef COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TYPES_H +#define COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TYPES_H namespace ckw { - /** Compute Kernel Writer data types. This data type is used by the code variables and tensor arguments. */ enum class DataType -{ - Unknown = 0x00, - Fp32 = 0x11, - Fp16 = 0x12, - Int32 = 0x21, - Int16 = 0x22, - Int8 = 0x24, - Uint32 = 0x31, - Uint16 = 0x32, - Uint8 = 0x34, - Bool = 0x41 -}; - -enum class GpuTargetLanguage { Unknown, - OpenCL -}; - -/* Binary operations -*/ -enum class BinaryOp : int32_t -{ - // Elementwise - Add = 0x0000, // + - Sub = 0x0001, // - - Mul = 0x0002, // * - Div = 0x0003, // / - Mod = 0x0004, // % - // Relational - Equal = 0x1000, // == - Less = 0x1001, // < - LessEqual = 0x1002, // <= - Greater = 0x1003, // > - GreaterEqual = 0x1004, // >= - // Algebra - MatMul_Nt_Nt = 0x2000, // X - MatMul_Nt_T = 0x2001, // X - MatMul_T_Nt = 0x2002, // X - MatMul_T_T = 0x2003, // X - Dot = 0x2004, // . - // Logical - LogicalAnd = 0x3000, // && - LogicalOr = 0x3001, // || - LogicalNot = 0x3002 // ! -}; - -enum class AssignmentOp : int32_t -{ - // Unary - Increment = 0x0000, // += - Decrement = 0x0001, // -= -}; - -enum class ScalarUnaryFunction : int32_t -{ - Exp, -}; - -enum class TensorSamplerFormat : int32_t -{ - Unknown = 0, - C_WH_1 = 1, - C_W_H = 2 -}; - -enum class TensorSamplerAddressModeX : int32_t -{ - Unknown = 0, - None = 1, // The user guarantees that the X coordinate is always in-bound - OverlappingMin = 2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length - // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice. + Fp32, + Fp16, + Int32, + Int16, + Int8, + Uint32, + Uint16, + Uint8, + Bool }; - -enum class TensorSamplerAddressModeY : int32_t -{ - Unknown = 0, - None = 1, // The user guarantees that the Y coordinate is always in-bound - OverlappingMin = 2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length - Skip = 3, // Skip the read/write - SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 - SkipMaxEdgeOnly = 5, // Skip less than 0 only - ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) - ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX - ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 - ClampToBorder = 9, // Clamp to border which always has 0 value - ClampToBorderMinEdgeOnly = 10, - ClampToBorderMaxEdgeOnly = 11 -}; - -enum class TensorSamplerAddressModeZ : int32_t -{ - Unknown = 0, - None = 1, // The user guarantees that the Y coordinate is always in-bound - Skip = 3, // Skip the read/write - SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 - SkipMaxEdgeOnly = 5, // Skip less than 0 only - ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) - ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX - ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 -}; - } // namespace ckw -#endif // CKW_INCLUDE_CKW_TYPES_H +#endif /* COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TYPES_H */ diff --git a/compute_kernel_writer/prototype/CMakeLists.txt b/compute_kernel_writer/prototype/CMakeLists.txt new file mode 100644 index 0000000000..84436a95d6 --- /dev/null +++ b/compute_kernel_writer/prototype/CMakeLists.txt @@ -0,0 +1,72 @@ +# Copyright (c) 2023 Arm Limited. +# +# SPDX-License-Identifier: MIT +# +# Permission is hereby granted, free of charge, to any person obtaining a copy +# of this software and associated documentation files (the "Software"), to +# deal in the Software without restriction, including without limitation the +# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +# sell copies of the Software, and to permit persons to whom the Software is +# furnished to do so, subject to the following conditions: +# +# The above copyright notice and this permission notice shall be included in all +# copies or substantial portions of the Software. +# +# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +# SOFTWARE. + +cmake_minimum_required(VERSION 3.14 FATAL_ERROR) + +#--------------------------------------------------------------------- +# Prototype + +add_library(ckw_prototype + src/TileInfo.cpp + src/TensorInfo.cpp + src/Kernel.cpp + src/KernelWriter.cpp + src/OperandBase.cpp + src/TileOperand.cpp + src/TensorOperand.cpp + src/TensorTileSampler.cpp +) + +target_compile_options(ckw_prototype + PUBLIC + ${CKW_CXX_FLAGS} + "$<$:${GNU_WARNINGS}>" + "$<$:${CKW_ASSERTS_OPTS}>" + "$<$:${CKW_ASSERTS_OPTS}>" + ${CMAKE_CXX_FLAGS} +) + +target_compile_definitions(ckw_prototype PUBLIC + $<$:COMPUTE_KERNEL_WRITER_DEBUG_ENABLED> + $<$:COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED> + $<$:COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED> + $<$:COMPUTE_KERNEL_WRITER_OPENCL_ENABLED> +) + +target_include_directories(ckw_prototype + PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include + PRIVATE ${CMAKE_CURRENT_LIST_DIR} +) + +#--------------------------------------------------------------------- +# Examples + +add_library(ckw_prototype_examples_common + examples/common/ExampleKernelWriter.cpp + examples/common/ExampleScopedKernelWriter.cpp + examples/common/ExampleComponentArgument.cpp +) + +target_link_libraries(ckw_prototype_examples_common PUBLIC ckw_prototype) + +add_executable(ckw_prototype_examples_add_exp_store examples/add_exp_store.cpp) +target_link_libraries(ckw_prototype_examples_add_exp_store PUBLIC ckw_prototype_examples_common) diff --git a/compute_kernel_writer/prototype/examples/add_exp_store.cpp b/compute_kernel_writer/prototype/examples/add_exp_store.cpp new file mode 100644 index 0000000000..9ee21957f1 --- /dev/null +++ b/compute_kernel_writer/prototype/examples/add_exp_store.cpp @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ckw/Error.h" +#include "ckw/KernelWriter.h" +#include "ckw/TensorOperand.h" +#include "ckw/TensorTileSampler.h" +#include "ckw/TileOperand.h" +#include "ckw/Types.h" + +#include "common/ExampleComponentArgument.h" +#include "common/ExampleKernelWriter.h" +#include "common/ExampleScopedKernelWriter.h" + +#include +#include + +using namespace ckw; + +TensorTileSampler create_simple_sampler(ExampleScopedKernelWriter writer) +{ + TensorTileSampler sampler; + + constexpr int32_t m0 = 4; + constexpr int32_t n0 = 4; + + auto &gid_0 = writer->declare_tile("gid_0", DataType::Int32); + auto &gid_1 = writer->declare_tile("gid_1", DataType::Int32); + auto &gid_2 = writer->declare_tile("gid_2", DataType::Int32); + + auto &const_0 = writer->declare_tile("0", 0); + + writer->op_get_global_id(gid_0, 0); + writer->op_get_global_id(gid_1, 1); + writer->op_get_global_id(gid_2, 2); + + sampler.x(gid_0); + sampler.y(gid_1); + sampler.z(const_0); + sampler.b(gid_2); + + sampler.width(n0); + sampler.height(m0); + + sampler.format(TensorSamplerFormat::C_WH_1); + sampler.address_mode_x(TensorSamplerAddressModeX::None); + sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder); + sampler.address_mode_z(TensorSamplerAddressModeZ::Skip); + + return sampler; +} + +void op_binary_elementwise(ExampleScopedKernelWriter writer, std::vector operands) +{ + auto lhs = operands.at(0); + auto rhs = operands.at(1); + auto dst = operands.at(2); + + // Load the LHS and RHS tile and prepare the tensor sampler. + if(!lhs->has_tile() && !rhs->has_tile()) + { + const auto sampler = create_simple_sampler(writer); + + writer->op_load_once(lhs, sampler); + writer->op_load_once(rhs, sampler); + } + else if(lhs->has_tile()) + { + const auto &sampler = lhs->tile_sampler(); + writer->op_load_once(rhs, sampler); + } + else + { + const auto &sampler = rhs->tile_sampler(); + writer->op_load_once(lhs, sampler); + } + + auto &lhs_tile = lhs->tile(); + auto &rhs_tile = rhs->tile(); + const auto &sampler = lhs->tile_sampler(); + + // Prepare the output tile. + if(!dst->has_tile()) + { + auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info()); + dst->init_virtual_tensor(tile, sampler); + } + + auto &dst_tile = dst->tile(); + + // Perform the operation. + writer->op_binary_expression(dst_tile, lhs_tile, rhs_tile, BinaryOp::Add); +} + +void op_exp(ExampleScopedKernelWriter writer, std::vector operands) +{ + auto src = operands.at(0); + auto dst = operands.at(1); + + // Load the source tile and prepare the sampler. + if(!src->has_tile()) + { + const auto sampler = create_simple_sampler(writer); + writer->op_load_once(src, sampler); + } + + auto &src_tile = src->tile(); + const auto &sampler = src->tile_sampler(); + + // Prepare the output tile. + if(!dst->has_tile()) + { + auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info()); + dst->init_virtual_tensor(tile, sampler); + } + + auto &dst_tile = dst->tile(); + + // Perform the operation. + writer->op_scalar_function(dst_tile, src_tile, ScalarUnaryFunction::Exp); +} + +void op_store(ExampleScopedKernelWriter writer, std::vector operands) +{ + auto src = operands.at(0); + auto dst = operands.at(1); + + auto &src_tile = src->tile(); + const auto &sampler = src->tile_sampler(); + auto &dst_tensor = dst->tensor(); + + writer->op_store(dst_tensor, src_tile, sampler); +} + +int main() +{ + Kernel kernel("example", GpuTargetLanguage::OpenCL); + ExampleKernelWriter root_writer(kernel); + + ExampleScopedKernelWriter writer(&root_writer); + + const TensorInfo src0_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 0); + const TensorInfo src1_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 1); + const TensorInfo dst_info(DataType::Fp32, TensorShape({ 3, 10, 20, 1, 1 }), TensorDataLayout::Nhwc, 2); + + ExampleComponentArgument src0(writer->create_tensor_argument("src0", src0_info)); + ExampleComponentArgument src1(writer->create_tensor_argument("src1", src1_info)); + ExampleComponentArgument dst(writer->create_tensor_argument("dst", dst_info)); + + ExampleComponentArgument ans; + + op_binary_elementwise(writer, { &src0, &src1, &ans }); + op_exp(writer, { &ans, &ans }); + op_store(writer, { &ans, &dst }); + + const auto code = root_writer.generate_code(); + std::cout << code; + + return 0; +} diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp new file mode 100644 index 0000000000..da5156ca8c --- /dev/null +++ b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ExampleComponentArgument.h" +#include "ckw/Error.h" + +ExampleComponentArgument::ExampleComponentArgument() +{ +} + +ExampleComponentArgument::ExampleComponentArgument(ckw::TensorOperand &tensor) + : _tensor(&tensor) +{ +} + +ExampleComponentArgument &ExampleComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler) +{ + CKW_ASSERT(_tile == nullptr); + + _tile = &tile; + _tile_sampler = tile_sampler; + + return *this; +} + +bool ExampleComponentArgument::has_tensor() const +{ + return _tensor != nullptr; +} + +ckw::TensorOperand &ExampleComponentArgument::tensor() +{ + CKW_ASSERT(_tensor != nullptr); + + return *_tensor; +} + +const ckw::TensorOperand &ExampleComponentArgument::tensor() const +{ + CKW_ASSERT(_tensor != nullptr); + + return *_tensor; +} + +bool ExampleComponentArgument::has_tile() const +{ + return _tile != nullptr; +} + +ckw::TileOperand &ExampleComponentArgument::tile() +{ + CKW_ASSERT(_tile != nullptr); + + return *_tile; +} + +const ckw::TileOperand &ExampleComponentArgument::tile() const +{ + CKW_ASSERT(_tile != nullptr); + + return *_tile; +} + +ckw::TensorTileSampler &ExampleComponentArgument::tile_sampler() +{ + CKW_ASSERT(_tile != nullptr); + + return _tile_sampler; +} + +const ckw::TensorTileSampler &ExampleComponentArgument::tile_sampler() const +{ + CKW_ASSERT(_tile != nullptr); + + return _tile_sampler; +} diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h new file mode 100644 index 0000000000..2de9042691 --- /dev/null +++ b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h @@ -0,0 +1,111 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLECOMPONENTARGUMENT_H +#define CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLECOMPONENTARGUMENT_H + +#include "ckw/TensorTileSampler.h" + +namespace ckw +{ +class TensorOperand; +class TileOperand; +} // namespace ckw + +/** The argument of a dynamic fusion component which can be either user tensor or virtual tensor. */ +class ExampleComponentArgument +{ +public: + /** Initialize a new instance of @ref ExampleComponentArgument class for empty virtual tensor. */ + ExampleComponentArgument(); + + /** Initialize a new instance of @ref ExampleComponentArgument class for user tensor. + * + * @param[in] tensor The user tensor. + */ + explicit ExampleComponentArgument(ckw::TensorOperand &tensor); + + /** Set virtual tensor information (tile, sampler) for the argument. + * + * If the component is a user tensor, it can be treated as virtual tensor as well + * and won't be loaded again using @ref ExampleKernelWriter::op_load_once method. + * + * @param[in] tile The tile that has been loaded. + * @param[in] sampler The tensor sampling information that has been used to load the tile. + */ + ExampleComponentArgument &init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &sampler); + + /** Get whether the argument is a user tensor. */ + bool has_tensor() const; + + /** Get the tensor operand. + * + * If the tensor is not available, throw an error. + */ + ckw::TensorOperand &tensor(); + + /** Get the tensor operand. + * + * If the tensor is not available, throw an error. + */ + const ckw::TensorOperand &tensor() const; + + /** Get whether the argument contains a tile. + * + * The argument can be either a user tensor that has been loaded, + * or a virtual tensor (i.e. a tile with tensor sampling information). + */ + bool has_tile() const; + + /** Get the tile operand. + * + * If the tile is not available, throw an error. + */ + ckw::TileOperand &tile(); + + /** Get the tile operand. + * + * If the tile is not available, throw an error. + */ + const ckw::TileOperand &tile() const; + + /** Get the tensor sampling information for the tile. + * + * If the tile is not available, throw an error. + */ + ckw::TensorTileSampler &tile_sampler(); + + /** Get the tensor sampling information for the tile. + * + * If the tile is not available, throw an error. + */ + const ckw::TensorTileSampler &tile_sampler() const; + +private: + ckw::TensorOperand *_tensor{ nullptr }; + ckw::TileOperand *_tile{ nullptr }; + ckw::TensorTileSampler _tile_sampler{}; +}; + +#endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLECOMPONENTARGUMENT_H diff --git a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp new file mode 100644 index 0000000000..2c11ae36e5 --- /dev/null +++ b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ExampleKernelWriter.h" +#include "ExampleComponentArgument.h" +#include "ckw/Error.h" +#include "ckw/TileInfo.h" + +ExampleKernelWriter::ExampleKernelWriter(ckw::Kernel &kernel) + : KernelWriter(kernel) +{ +} + +void ExampleKernelWriter::op_load_once(ExampleComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler) +{ + if(!tensor_or_tile->has_tile()) + { + CKW_ASSERT(tensor_or_tile->has_tensor()); + + auto &tensor = tensor_or_tile->tensor(); + + const auto tile_name = tensor.name() + "_tile"; + auto &tile = declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); + + op_load(tile, tensor, sampler); + + tensor_or_tile->init_virtual_tensor(tile, sampler); + } +} diff --git a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h new file mode 100644 index 0000000000..1528c3d933 --- /dev/null +++ b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h @@ -0,0 +1,56 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLEKERNELWRITER_H +#define CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLEKERNELWRITER_H + +#include "ckw/KernelWriter.h" +#include "ckw/TensorTileSampler.h" + +class ExampleComponentArgument; + +namespace ckw +{ +class Kernel; +} // namespace ckw + +/** Extended implementation of kernel writer for dynamic fusion. */ +class ExampleKernelWriter : public ckw::KernelWriter +{ +public: + /** Initialize a new instance of @ref ExampleKernelWriter class. + * + * @param[in] kernel The kernel to be generated. + */ + explicit ExampleKernelWriter(ckw::Kernel &kernel); + + /** Load the user tensor to the tile in the same component argument if it hasn't been loaded. + * + * @param[in] tensor_or_tile The component argument that is either a user tensor or a virtual tensor. + * @param[in] sampler The tensor sampling information to load the tile. + */ + void op_load_once(ExampleComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler); +}; + +#endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLEKERNELWRITER_H diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp new file mode 100644 index 0000000000..7c44fa8749 --- /dev/null +++ b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp @@ -0,0 +1,58 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ExampleScopedKernelWriter.h" +#include "ExampleKernelWriter.h" + +ExampleScopedKernelWriter::ExampleScopedKernelWriter(ExampleKernelWriter *writer) + : _writer(writer), _parent_id_space(writer->id_space()) +{ + _writer->next_id_space(); +} + +ExampleScopedKernelWriter::ExampleScopedKernelWriter(const ExampleScopedKernelWriter &other) + : _writer(other._writer), _parent_id_space(other._writer->id_space()) +{ + _writer->next_id_space(); +} + +ExampleKernelWriter *ExampleScopedKernelWriter::operator->() +{ + return _writer; +} + +const ExampleKernelWriter *ExampleScopedKernelWriter::operator->() const +{ + return _writer; +} + +ExampleKernelWriter *ExampleScopedKernelWriter::writer() +{ + return _writer; +} + +const ExampleKernelWriter *ExampleScopedKernelWriter::writer() const +{ + return _writer; +} diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h new file mode 100644 index 0000000000..1aa0242c51 --- /dev/null +++ b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h @@ -0,0 +1,62 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H +#define CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H + +#include + +class ExampleKernelWriter; + +/** Helper to automatically manage kernel writer ID space. */ +class ExampleScopedKernelWriter +{ +public: + /** Initialize a new instance of @ref ExampleScopedKernelWriter class. */ + explicit ExampleScopedKernelWriter(ExampleKernelWriter *writer); + + /** Create a new scope from the specified scoped kernel writer. */ + ExampleScopedKernelWriter(const ExampleScopedKernelWriter &other); + + /** Assignment is disallowed. */ + ExampleScopedKernelWriter &operator=(const ExampleScopedKernelWriter &) = delete; + + /** Access the underlying kernel writer. */ + ExampleKernelWriter *operator->(); + + /** Access the underlying kernel writer. */ + const ExampleKernelWriter *operator->() const; + + /** Get the kernel writer. */ + ExampleKernelWriter *writer(); + + /** Get the kernel writer. */ + const ExampleKernelWriter *writer() const; + +private: + ExampleKernelWriter *_writer; + int32_t _parent_id_space; +}; + +#endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H diff --git a/compute_kernel_writer/prototype/include/ckw/Error.h b/compute_kernel_writer/prototype/include/ckw/Error.h new file mode 100644 index 0000000000..b18944eac5 --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/Error.h @@ -0,0 +1,79 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_ERROR_H +#define CKW_PROTOTYPE_INCLUDE_CKW_ERROR_H + +#include +#include + +namespace ckw +{ + +/** If the condition is not met, throw an std::runtime_error with the specified message. + * + * @param[in] cond The condition that is expected to be true. + * @param[in] msg The error message when the condition is not met. + */ +#define CKW_ASSERT_MSG(cond, msg) \ + do \ + { \ + if(!(cond)) \ + { \ + throw ::std::runtime_error(msg); \ + } \ + } while(false) + +/** If the condition is not met, throw an std::runtime_error. + * + * @param[in] cond The condition that is expected to be true. + */ +#define CKW_ASSERT(cond) CKW_ASSERT_MSG(cond, #cond) + +/** If the precondition is met but the consequence is not met, throw an std::runtime_error. + * + * @param[in] precond The condition if is met requires the consequence must also be met. + * @param[in] cond The condition that is expected to be true if the precondition is true. + */ +#define CKW_ASSERT_IF(precond, cond) \ + CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond) + +/** Mark the variables as unused. + * + * @param[in] ... Variables which are unused. + */ +#define CKW_UNUSED(...) ::ckw::ignore_unused(__VA_ARGS__) // NOLINT + +/** Mark the variables as unused. + * + * @param[in] ... Variables which are unused. + */ +template +inline void ignore_unused(T &&...) +{ +} + +} // namespace ckw + +#endif // CKW_INCLUDE_CKW_ERROR_H diff --git a/compute_kernel_writer/prototype/include/ckw/Kernel.h b/compute_kernel_writer/prototype/include/ckw/Kernel.h new file mode 100644 index 0000000000..57a8a40341 --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/Kernel.h @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_KERNEL_H +#define CKW_PROTOTYPE_INCLUDE_CKW_KERNEL_H + +#include "ckw/OperandBase.h" +#include "ckw/Types.h" + +#include +#include +#include + +namespace ckw +{ + +namespace prototype +{ +class GpuKernelWriterDataHolder; +} // namespace prototype + +/** The target for kernel writer to write into. */ +class Kernel +{ +public: + /** Constructor + * + * @param[in] name The name of the kernel function. + * @param[in] language The programming language to write the kernel. + */ + Kernel(const char *name, GpuTargetLanguage language); + + /** Destructor */ + ~Kernel(); + + /** Get the name of the kernel function. */ + const std::string &name() const; + + /** (Internal use only) Get the map from operand name to the operand declared in this kernel. */ + const ::std::map<::std::string, ::std::unique_ptr> &operands() const; + + /** (Internal use only) Get the map from operand name to the operand declared in this kernel. */ + ::std::map<::std::string, ::std::unique_ptr> &operands(); + + /** (Internal use only) Get the implementation data. */ + prototype::GpuKernelWriterDataHolder *impl(); + +private: + ::std::string _name; + ::std::unique_ptr _kernel; + ::std::map<::std::string, ::std::unique_ptr> _operands; +}; + +} // namespace ckw + +#endif // CKW_PROTOTYPE_INCLUDE_CKW_KERNEL_H diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h new file mode 100644 index 0000000000..a2778a9485 --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h @@ -0,0 +1,217 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_KERNELWRITER_H +#define CKW_PROTOTYPE_INCLUDE_CKW_KERNELWRITER_H + +#include "ckw/Kernel.h" +#include "ckw/TensorInfo.h" +#include "ckw/TensorOperand.h" +#include "ckw/TileInfo.h" +#include "ckw/TileOperand.h" + +#include + +namespace ckw +{ + +namespace prototype +{ +struct GpuKernelWriterAttribute; +class IGpuKernelWriter; +} // namespace prototype + +/** Kernel writer. */ +class KernelWriter +{ +public: + // ============================================================================================= + // Constructors and destructor + // ============================================================================================= + + /** Initialize a new instance of kernel writer. + * + * @param[in] kernel The kernel to be written to. + */ + explicit KernelWriter(Kernel &kernel); + + /** Destructor */ + ~KernelWriter(); + + /** No copy constructor. */ + KernelWriter(const KernelWriter &) = delete; + + /** No copy assignment. */ + KernelWriter &operator=(const KernelWriter &) = delete; + + // ============================================================================================= + // Scope management + // ============================================================================================= + + /** Get the current ID space. */ + int32_t id_space() const; + + /** Set the current ID space. */ + KernelWriter &id_space(int32_t id_space); + + /** Switch to and return a new ID space. */ + int32_t next_id_space(); + + // ============================================================================================= + // Tensor and tile declaration + // ============================================================================================= + + /** Define a tensor argument. + * + * @param[in] name The name of the tensor. + * @param[in] info The tensor info. + * + * @return The @ref TensorOperand object. + */ + TensorOperand &create_tensor_argument(const char *name, const TensorInfo &info); + + /** Define a compile-time constant scalar argument. + * + * @param[in] name The name of the tile. + * @param[in] value The value of the tile. + * + * @return The @ref TileOperand object. + */ + TileOperand &create_tile_argument(const char *name, int32_t value); + + /** Declare a new tile. + * + * The name of the tile must be unique in the current ID space. + * + * @param[in] name The name of the tile. + * @param[in] ... The necessary arguments to create a new @ref TileOperand. + * + * @return The @ref TileOperand object. + */ + template + TileOperand &declare_tile(const char *name, TArgs &&...args) + { + const auto var_name = generate_variable_name(name); + auto operand = new TileOperand(var_name, ::std::forward(args)...); + register_operand(operand, true); + + return *operand; + } + + // ============================================================================================= + // Load and store + // ============================================================================================= + + /** Load the data from the tensor memory to the tile using the sampling information. + * + * @param[out] tile The tile to be loaded. + * @param[in] tensor The tensor to be read. + * @param[in] sampler The tensor sampling information. + */ + void op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler); + + /** Store the tile to the tensor using the specified sampling information. + * + * @param[out] dst The tensor that the tile is written to. + * @param[in] src The tile to be stored. + * @param[in] sampler The tensor sampling information. + */ + void op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler); + + // ============================================================================================= + // Data processing + // ============================================================================================= + + /** Write assignment: ` = `. + * + * @param[in] dst The destination tile. + * @param[in] src The source tile. + */ + void op_assign(TileOperand &dst, const TileOperand &src); + + /** Write binary expression: ` = `. + * + * @param[in] dst The destination tile. + * @param[in] lhs The LHS operand. + * @param[in] rhs The RHS operand. + * @param[in] op The binary operator. + */ + void op_binary_expression(TileOperand &dst, const TileOperand &lhs, const TileOperand &rhs, BinaryOp op); + + /** Write function applied to scalar value: ` = ()`. + * + * @param[in] dst The destination tile. + * @param[in] src The source tile. + * @param[in] func The function to be applied to the source tile. + */ + void op_scalar_function(TileOperand &dst, const TileOperand &src, ScalarUnaryFunction func); + + // ============================================================================================= + // Misc + // ============================================================================================= + + /** Set `dst` the global ID of dimension `dim`. + * + * @param[in] dst The tile to be written to. + * @param[in] dim The global ID dimension. + */ + void op_get_global_id(TileOperand &dst, int32_t dim); + + // ============================================================================================= + // Code generation + // ============================================================================================= + + /** Generate the source code of the kernel. */ + ::std::string generate_code(); + +private: + /** Generate the full variable name based on the original name and the ID space. + * + * @param[in] name The name of the variable. + * + * @return The full variable name. + */ + ::std::string generate_variable_name(const char *name) const; + + /** Register the operand to the kernel. + * + * The operand is uniquely owned by the kernel afterward. + * + * @param[in] operand The operand to be registered. + * @param[in] declaring Whether the tile declaration is generated. + */ + void register_operand(OperandBase *operand, bool declaring); + +private: + Kernel *_kernel; + ::std::unique_ptr _impl_attr; + ::std::unique_ptr _impl; + + int32_t _id_space{ 0 }; + int32_t _max_id_space{ 0 }; +}; + +} // namespace ckw + +#endif // CKW_PROTOTYPE_INCLUDE_CKW_KERNELWRITER_H diff --git a/compute_kernel_writer/prototype/include/ckw/OperandBase.h b/compute_kernel_writer/prototype/include/ckw/OperandBase.h new file mode 100644 index 0000000000..f4825e16a7 --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/OperandBase.h @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H +#define CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H + +#include "ckw/Types.h" +#include + +namespace ckw +{ +namespace prototype +{ +class IGpuKernelWriter; +class Operand; +} // namespace prototype + +/** The base class for all operands. */ +class OperandBase +{ +public: + /** Constructor + * + * @param[in] name The name of the operand. + */ + explicit OperandBase(const ::std::string &name); + + /** Destructor */ + virtual ~OperandBase(); + + /** (Internal use only) Create the implementation operand. + * + * @param[in] writer The implementation kernel writer. + */ + virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const = 0; + + /** Get the name of the operand. */ + const ::std::string &name() const; + + /** Set the name of the operand. */ + OperandBase &name(const ::std::string &name); + + /** Get the data type of the operand. */ + virtual DataType data_type() const = 0; + + /** Get whether the operand is compile-time constant. */ + virtual bool is_constant() const = 0; + +private: + ::std::string _name; +}; + +} // namespace ckw + +#endif // CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H diff --git a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h new file mode 100644 index 0000000000..16c3f6d441 --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h @@ -0,0 +1,137 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_SCALARVALUE_H +#define CKW_PROTOTYPE_INCLUDE_CKW_SCALARVALUE_H + +#include "ckw/Error.h" + +#include + +namespace ckw +{ + +/** The scalar value known at compile-time. */ +class ScalarValue +{ +public: + /** Initialize a new instance of @ref ScalarValue class with integer value 0. */ + ScalarValue() + { + _type = Type::INT; + _value.i64 = 0; + } + + /** Initialize a new instance of @ref ScalarValue class with the specified value. */ + template + ScalarValue(T value) + { + set(value); + } + + /** Set the value. */ + template + void set(T value) + { + CKW_ASSERT(::std::is_integral::value || ::std::is_floating_point::value); + CKW_ASSERT(sizeof(T) <= 8); + + _size = sizeof(T); + + if(::std::is_integral::value) + { + if(::std::is_signed::value) + { + _type = Type::INT; + _value.i64 = value; + } + else + { + _type = Type::UINT; + _value.u64 = value; + } + } + else + { + _type = Type::FLOAT; + _value.f64 = value; + } + } + + /** Get the value. + * + * The caller must make sure that what has been stored in the object must fit + * the output data type without data corruption or loss of accuracy. + */ + template + T get() const + { + CKW_ASSERT(::std::is_integral::value || ::std::is_floating_point::value); + CKW_ASSERT(sizeof(T) >= _size); + + if(::std::is_integral::value) + { + if(::std::is_signed::value) + { + CKW_ASSERT(_type == Type::INT || _type == Type::UINT); + CKW_ASSERT_IF(_type == Type::UINT, sizeof(T) > _size); + + return _value.i64; + } + else + { + CKW_ASSERT(_type == Type::INT); + + return _value.u64; + } + } + else + { + return _value.f64; + } + } + +private: + union Value + { + int64_t i64; + uint64_t u64; + double f64; + }; + + enum class Type : int32_t + { + UINT, + INT, + FLOAT, + }; + + Value _value{}; + Type _type{}; + uint32_t _size{}; +}; + +} // namespace ckw + +#endif // CKW_PROTOTYPE_INCLUDE_CKW_SCALARVALUE_H diff --git a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h new file mode 100644 index 0000000000..00bb60a444 --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h @@ -0,0 +1,145 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H +#define CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H + +#include "ckw/Types.h" + +#include +#include + +namespace ckw +{ +/** Compute Kernel Writer tensor data layout (or memory format) */ +enum class TensorDataLayout +{ + Unknown, + Nhwc, + Ndhwc +}; + +/** Compute Kernel Writer tensor data layout component */ +enum class TensorDataLayoutComponent +{ + Unknown, + N, + D, + H, + W, + C, +}; + +/** Compute Kernel Writer tensor component bitmask. The bitmask can be used to retrieve + * the info from @ref TensorComponent. + */ +enum class TensorComponentBitmask : uint32_t +{ + OffsetFirstElement = 0x01000000, // For example, OffsetFirstElement in @ref TensorComponent + Stride = 0x02000000, // For example, stride0 in @ref TensorComponent + Dimension = 0x04000000, // For example, Dim0 in @ref TensorComponent + FoldedDimensions = 0x08000000, // For example, Dim0xDim1 in @ref TensorComponent +}; + +/** Compute Kernel Writer tensor component. The tensor components are used to access specific backend-agnostic tensor arguments, + * such as the tensor dimensions and tensor strides. + * The data type is represented as an integer. The value of the integer value + * is assigned to retrieve the information through the @ref TensorComponentBitmask. + */ +enum class TensorComponent : uint32_t +{ + Unknown = 0x00000000, + OffsetFirstElement = 0x01000000, + Stride0 = 0x02000001, + Stride1 = 0x02000010, + Stride2 = 0x02000100, + Stride3 = 0x02001000, + Stride4 = 0x02010000, + Dim0 = 0x04000001, + Dim1 = 0x04000010, + Dim2 = 0x04000100, + Dim3 = 0x04001000, + Dim4 = 0x04010000, + Dim1xDim2 = 0x08000110, + Dim2xDim3 = 0x08001100, + Dim1xDim2xDim3 = 0x08001110 +}; + +/** Compute Kernel Writer tensor storage. The tensor storage represents the type of tensor memory object. + */ +enum class TensorStorage : uint32_t +{ + Unknown = 0x00000000, + BufferUint8Ptr = 0x01000000, + Texture2dReadOnly = 0x02000001, + Texture2dWriteOnly = 0x02000010, +}; + +/** Compute Kernel Writer tensor shape + * Negative dimensions can be interpreted as dynamic dimensions by the Compute Kernel Writer + */ +using TensorShape = std::array; + +/** Compute Kernel Writer tensor info */ +class TensorInfo +{ +public: + /** Constructor + * + * @param[in] dt Tensor data type + * @param[in] shape Tensor shape + * @param[in] dl Tensor data layout + * @param[in] id Tensor id. The id is used to keep track of the bound user tensor. Through the id, + * the user can know what tensor has been used by the Compute Kernel Writer. + * Possible id values: + * - greater than or equal to 0: bind a user specific tensors + * - less than 0: bind a virtual tensor (tile) + */ + TensorInfo(DataType dt, const TensorShape &shape, TensorDataLayout dl, int32_t id); + /** Set shape */ + TensorInfo &shape(const TensorShape &shape); + /** Get shape */ + TensorShape shape() const; + /** Set data type */ + TensorInfo &data_type(DataType dt); + /** Get data type */ + DataType data_type() const; + /** Set data layout */ + TensorInfo &data_layout(TensorDataLayout dl); + /** Get data layout */ + TensorDataLayout data_layout() const; + /** Set id */ + TensorInfo &id(int32_t id); + /** Get layout */ + int32_t id() const; + +private: + TensorShape _shape{ { 0 } }; + DataType _dt{ DataType::Unknown }; + TensorDataLayout _dl{ TensorDataLayout::Unknown }; + int32_t _id{ -1 }; +}; +} // namespace kw + +#endif /* CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H */ diff --git a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h new file mode 100644 index 0000000000..2fc5044d1c --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h @@ -0,0 +1,181 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TENSOROPERAND_H +#define CKW_PROTOTYPE_INCLUDE_CKW_TENSOROPERAND_H + +#include "ckw/OperandBase.h" +#include "ckw/TensorInfo.h" +#include "ckw/TensorTileSampler.h" +#include "ckw/TileOperand.h" +#include "ckw/Types.h" + +#include + +namespace ckw +{ + +class TensorComponentOperand; + +// ================================================================================================= +// TensorOperand +// ================================================================================================= + +/** Tensor operand */ +class TensorOperand : public OperandBase +{ +public: + /** Initialize a new instance of @ref TensorOperand class. + * + * @param[in] name The name of the tensor. + * @param[in] info The tensor info. + */ + TensorOperand(const ::std::string &name, const TensorInfo &info); + + /** No copy constructor. */ + TensorOperand(const TensorOperand &other) = delete; + + /** No copy assignment. */ + TensorOperand &operator=(const TensorOperand &other) = delete; + + /** (Internal use only) Create the implementation operand. + * + * @param[in] writer The implementation kernel writer. + */ + virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override; + + /** Get the tensor info. */ + const TensorInfo &info() const; + + /** Get the tensor info. */ + TensorInfo &info(); + + /** Get the data type. */ + virtual DataType data_type() const override; + + /** Get whether the tensor is compile-time constant. */ + virtual bool is_constant() const override; + + /** Get the default tile attached to the tensor. */ + const TileOperand &tile() const; + + /** Get the default tile attached to the tensor. */ + TileOperand &tile(); + + /** Set the default tile attached to the tensor. */ + TensorOperand &tile(TileOperand &tile); + + /** Get the tensor sampler of the default tile. */ + const TensorTileSampler &tile_sampler() const; + + /** Get the tensor sampler of the default tile. */ + TensorTileSampler &tile_sampler(); + + /** Set the tensor sampler of the default tile. */ + TensorOperand &tile_sampler(const TensorTileSampler &value); + + /** Get the operand that contains the stride in y dimension of the tensor. */ + TileOperand &stride1(); + + /** Get the operand that contains the stride in z dimension of the tensor. */ + TileOperand &stride2(); + + /** Get the operand that contains the stride in w dimension of the tensor. */ + TileOperand &stride3(); + + /** Get the operand that contains the stride in w dimension of the tensor. */ + TileOperand &stride4(); + + /** Get the operand that contains the size of dimension 0 of the tensor. */ + TileOperand &dim0(); + + /** Get the operand that contains the size of dimension 1 of the tensor. */ + TileOperand &dim1(); + + /** Get the operand that contains the size of dimension 2 of the tensor. */ + TileOperand &dim2(); + + /** Get the operand that contains the size of dimension 3 of the tensor. */ + TileOperand &dim3(); + + /** Get the operand that contains the size of dimension 4 of the tensor. */ + TileOperand &dim4(); + + /** Get the operand that contains the size of dimensions 1 and 2 collapsed. */ + TileOperand &dim1_dim2(); + + /** Get the operand that contains the size of dimensions 1, 2 and 3 collapsed. */ + TileOperand &dim1_dim2_dim3(); + + /** Get the operand that contains the offset in bytes to the first element. */ + TileOperand &offset_first_element_in_bytes(); + +private: + TensorInfo _info; + + TileOperand *_tile{ nullptr }; + TensorTileSampler _tile_sampler{}; + + ::std::unique_ptr _stride1{ nullptr }; + ::std::unique_ptr _stride2{ nullptr }; + ::std::unique_ptr _stride3{ nullptr }; + ::std::unique_ptr _stride4{ nullptr }; + ::std::unique_ptr _dim0{ nullptr }; + ::std::unique_ptr _dim1{ nullptr }; + ::std::unique_ptr _dim2{ nullptr }; + ::std::unique_ptr _dim3{ nullptr }; + ::std::unique_ptr _dim4{ nullptr }; + ::std::unique_ptr _dim1_dim2{ nullptr }; + ::std::unique_ptr _dim1_dim2_dim3{ nullptr }; + ::std::unique_ptr _offset_first_element_in_bytes{ nullptr }; +}; + +// ================================================================================================= +// TensorComponentOperand +// ================================================================================================= + +/** Tile operand that contains tensor information. */ +class TensorComponentOperand : public TileOperand +{ +public: + /** Initialize a new instance of @ref TensorComponentOperand class. + * + * @param[in] name The name of the operand. + * @param[in] component The tensor info component. + */ + TensorComponentOperand(const ::std::string &name, TensorComponent component); + + /** (Internal use only) Create the implementation operand. + * + * @param[in] writer The implementation kernel writer. + */ + virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override; + +private: + TensorComponent _component; +}; + +} // namespace ckw + +#endif // CKW_PROTOTYPE_INCLUDE_CKW_TENSOROPERAND_H diff --git a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h new file mode 100644 index 0000000000..2ea65bce9e --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h @@ -0,0 +1,163 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H +#define CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H + +#include "ckw/Types.h" +#include + +namespace ckw +{ + +class TileOperand; + +/** Tensor sampler + * + * It contains information about how the result tile should be stored to tensor memory. + * It can also be used to dictate how the subsequent operators fetch the input tensor. + */ +class TensorTileSampler +{ +public: + /** Initialize a new instance of @ref TensorSampler class. */ + TensorTileSampler(); + + /** Initialize a new instance of @ref TensorSampler class. + * + * @param[in] x The coordinate in the x dimension. + * @param[in] y The coordinate in the y dimension. + * @param[in] z The coordinate in the z dimension. + * @param[in] b The coordinate in the batch dimension. + * @param[in] format The tensor data format. + * @param[in] address_mode_x The address mode of the x dimension. + * @param[in] address_mode_y The address mode of the y dimension. + * @param[in] address_mode_z The address mode of the z dimension. + */ + TensorTileSampler( + TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z); + + /** Initialize a new instance of @ref TensorSampler class. + * + * @param[in] x The coordinate in the x dimension. + * @param[in] y The coordinate in the y dimension. + * @param[in] z The coordinate in the z dimension. + * @param[in] b The coordinate in the batch dimension. + * @param[in] height The height of the tile. + * @param[in] width The width of the tile. + * @param[in] format The tensor data format. + * @param[in] address_mode_x The address mode of the x dimension. + * @param[in] address_mode_y The address mode of the y dimension. + * @param[in] address_mode_z The address mode of the z dimension. + */ + TensorTileSampler( + TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, + int32_t height, int32_t width, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z); + + /** Get the coordinate in the x dimension. */ + const TileOperand &x() const; + + /** Set the coordinate in the x dimension. */ + TensorTileSampler &x(TileOperand &x); + + /** Get the coordinate in the y dimension. */ + const TileOperand &y() const; + + /** Set the coordinate in the y dimension. */ + TensorTileSampler &y(TileOperand &y); + + /** Get the coordinate in the z dimension. */ + const TileOperand &z() const; + + /** Set the coordinate in the z dimension. */ + TensorTileSampler &z(TileOperand &z); + + /** Get the coordinate in the batch dimension. */ + const TileOperand &b() const; + + /** Set the coordinate in the batch dimension. */ + TensorTileSampler &b(TileOperand &b); + + /** Get the width of the tile. */ + int32_t width() const; + + /** Set the width of the tile. */ + TensorTileSampler &width(int32_t width); + + /** Get the height of the tile. */ + int32_t height() const; + + /** Set the height of the tile. */ + TensorTileSampler &height(int32_t height); + + /** Get the format of the tensor. */ + TensorSamplerFormat format() const; + + /** Set the format of the tensor. */ + TensorTileSampler &format(TensorSamplerFormat format); + + /** Get the address mode of the x dimension. */ + TensorSamplerAddressModeX address_mode_x() const; + + /** Set the address mode of the x-dimension. */ + TensorTileSampler &address_mode_x(TensorSamplerAddressModeX address_mode_x); + + /** Get the address mode of the y dimension. */ + TensorSamplerAddressModeY address_mode_y() const; + + /** Set the address mode of the y dimension. */ + TensorTileSampler &address_mode_y(TensorSamplerAddressModeY address_mode_y); + + /** Get the address mode of the z dimension. */ + TensorSamplerAddressModeZ address_mode_z() const; + + /** Set the address mode of the z dimension. */ + TensorTileSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z); + +private: + TileOperand *_x{ nullptr }; + TileOperand *_y{ nullptr }; + TileOperand *_z{ nullptr }; + TileOperand *_b{ nullptr }; + + int32_t _height{ 0 }; + int32_t _width{ 0 }; + + TensorSamplerFormat _format{ TensorSamplerFormat::Unknown }; + TensorSamplerAddressModeX _address_mode_x{ TensorSamplerAddressModeX::Unknown }; + TensorSamplerAddressModeY _address_mode_y{ TensorSamplerAddressModeY::Unknown }; + TensorSamplerAddressModeZ _address_mode_z{ TensorSamplerAddressModeZ::Unknown }; +}; + +} // namespace ckw + +#endif // CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H diff --git a/compute_kernel_writer/prototype/include/ckw/TileInfo.h b/compute_kernel_writer/prototype/include/ckw/TileInfo.h new file mode 100644 index 0000000000..8fba8bb827 --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/TileInfo.h @@ -0,0 +1,84 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TILEINFO_H +#define CKW_PROTOTYPE_INCLUDE_CKW_TILEINFO_H + +#include "ckw/Types.h" + +#include +#include + +namespace ckw +{ +// Constants to access the tile width and height in the TileShape +constexpr int32_t kTileWidthIdx = 0; +constexpr int32_t kTileHeightIdx = 1; + +/** Compute Kernel Writer tile shape. It is used to define the shape of the tile */ +using TileShape = std::array; + +/** Compute Kernel Writer tile info */ +class TileInfo +{ +public: + /** Constructor used to initialize a scalar variable with a given data type + * + * @param[in] dt Tile data type + */ + TileInfo(DataType dt); + /** Constructor used to initialize a vector with a given data type and vector length. + * + * @param[in] dt Tile data type + * @param[in] w Tile width (or vector length) + */ + TileInfo(DataType dt, int32_t w); + /** Constructor used to initialize a tile with a given data type and tile sizes. + * + * @param[in] dt Tile data type + * @param[in] h Tile height + * @param[in] w Tile width + */ + TileInfo(DataType dt, int32_t h, int32_t w); + /** Set width */ + TileInfo &width(int32_t w); + /** Get width */ + int32_t width() const; + /** Set height */ + TileInfo &height(int32_t h); + /** Get height */ + int32_t height() const; + /** Set data type */ + TileInfo &data_type(DataType dt); + /** Get data type */ + DataType data_type() const; + +private: + DataType _dt{ DataType::Unknown }; + TileShape _shape{}; +}; + +} // namespace ckw + +#endif /* COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TILEINFO_H */ diff --git a/compute_kernel_writer/prototype/include/ckw/TileOperand.h b/compute_kernel_writer/prototype/include/ckw/TileOperand.h new file mode 100644 index 0000000000..c071707a45 --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/TileOperand.h @@ -0,0 +1,110 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TILEOPERAND_H +#define CKW_PROTOTYPE_INCLUDE_CKW_TILEOPERAND_H + +#include "ckw/Error.h" +#include "ckw/OperandBase.h" +#include "ckw/ScalarValue.h" +#include "ckw/TileInfo.h" + +#include + +namespace ckw +{ + +class Kernel; + +/** Tile operand which can be either scalar, vector or 2D tile. */ +class TileOperand : public OperandBase +{ +public: + /** Initialize a new instance of @ref TileOperand class with the tile information. + * + * @param[in] name The name of the tile. + * @param[in] tile_info The tile info. + */ + TileOperand(const ::std::string &name, const TileInfo &tile_info); + + /** Initialize a new instance of @ref TileOperand for scalar variable. + * + * @param[in] name The name of the tile. + * @param[in] data_type The data type of the tile. + */ + TileOperand(const ::std::string &name, DataType data_type); + + /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable. + * + * @param[in] name The name of the tile. + * @param[in] value The value of the tile. + */ + TileOperand(const ::std::string &name, int32_t value); + + /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable. + * + * @param[in] name The name of the tile. + * @param[in] value The value of the tile. + */ + TileOperand(const ::std::string &name, float value); + + /** Prohibit copy of tile operand. */ + TileOperand(const TileOperand &) = delete; + + /** Prohibit copy of tile operand. */ + TileOperand &operator=(const TileOperand &) = delete; + + /** (Internal use only) Create the implementation operand. + * + * @param[in] writer The implementation kernel writer. + */ + virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override; + + /** Get the tile info. */ + const TileInfo &tile_info() const; + + /** Get the data type of the tile. */ + virtual DataType data_type() const override; + + /** Get whether the tile is compile-time constant. */ + virtual bool is_constant() const override; + + /** Get whether the tile is a scalar value. */ + bool is_scalar() const; + + /** Get the scalar value of the tile. + * + * The tile must have the shape of 1, 1 (i.e. scalar). + */ + ScalarValue scalar_value() const; + +private: + TileInfo _info; + ScalarValue _value{}; + bool _constant; +}; + +} // namespace ckw + +#endif // CKW_PROTOTYPE_INCLUDE_CKW_TILEOPERAND_H diff --git a/compute_kernel_writer/prototype/include/ckw/Types.h b/compute_kernel_writer/prototype/include/ckw/Types.h new file mode 100644 index 0000000000..bb5d7ce077 --- /dev/null +++ b/compute_kernel_writer/prototype/include/ckw/Types.h @@ -0,0 +1,140 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TYPES_H +#define CKW_PROTOTYPE_INCLUDE_CKW_TYPES_H + +#include +#include + +namespace ckw +{ + +/** Compute Kernel Writer data types. This data type is used by the code variables and tensor arguments. */ +enum class DataType +{ + Unknown = 0x00, + Fp32 = 0x11, + Fp16 = 0x12, + Int32 = 0x21, + Int16 = 0x22, + Int8 = 0x24, + Uint32 = 0x31, + Uint16 = 0x32, + Uint8 = 0x34, + Bool = 0x41 +}; + +enum class GpuTargetLanguage +{ + Unknown, + OpenCL +}; + +/* Binary operations +*/ +enum class BinaryOp : int32_t +{ + // Elementwise + Add = 0x0000, // + + Sub = 0x0001, // - + Mul = 0x0002, // * + Div = 0x0003, // / + Mod = 0x0004, // % + // Relational + Equal = 0x1000, // == + Less = 0x1001, // < + LessEqual = 0x1002, // <= + Greater = 0x1003, // > + GreaterEqual = 0x1004, // >= + // Algebra + MatMul_Nt_Nt = 0x2000, // X + MatMul_Nt_T = 0x2001, // X + MatMul_T_Nt = 0x2002, // X + MatMul_T_T = 0x2003, // X + Dot = 0x2004, // . + // Logical + LogicalAnd = 0x3000, // && + LogicalOr = 0x3001, // || + LogicalNot = 0x3002 // ! +}; + +enum class AssignmentOp : int32_t +{ + // Unary + Increment = 0x0000, // += + Decrement = 0x0001, // -= +}; + +enum class ScalarUnaryFunction : int32_t +{ + Exp, +}; + +enum class TensorSamplerFormat : int32_t +{ + Unknown = 0, + C_WH_1 = 1, + C_W_H = 2 +}; + +enum class TensorSamplerAddressModeX : int32_t +{ + Unknown = 0, + None = 1, // The user guarantees that the X coordinate is always in-bound + OverlappingMin = 2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length + // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice. +}; + +enum class TensorSamplerAddressModeY : int32_t +{ + Unknown = 0, + None = 1, // The user guarantees that the Y coordinate is always in-bound + OverlappingMin = 2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length + Skip = 3, // Skip the read/write + SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 + SkipMaxEdgeOnly = 5, // Skip less than 0 only + ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) + ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX + ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 + ClampToBorder = 9, // Clamp to border which always has 0 value + ClampToBorderMinEdgeOnly = 10, + ClampToBorderMaxEdgeOnly = 11 +}; + +enum class TensorSamplerAddressModeZ : int32_t +{ + Unknown = 0, + None = 1, // The user guarantees that the Y coordinate is always in-bound + Skip = 3, // Skip the read/write + SkipMinEdgeOnly = 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 + SkipMaxEdgeOnly = 5, // Skip less than 0 only + ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) + ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX + ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 +}; + +} // namespace ckw + +#endif // CKW_PROTOTYPE_INCLUDE_CKW_TYPES_H diff --git a/compute_kernel_writer/prototype/src/Kernel.cpp b/compute_kernel_writer/prototype/src/Kernel.cpp new file mode 100644 index 0000000000..bbf5c440a7 --- /dev/null +++ b/compute_kernel_writer/prototype/src/Kernel.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ckw/Kernel.h" +#include "ckw/Types.h" +#include "src/Prototype.h" + +namespace ckw +{ + +Kernel::Kernel(const char *name, GpuTargetLanguage language) + : _name(name), _kernel(std::make_unique(language)), _operands{} +{ +} + +Kernel::~Kernel() +{ +} + +const std::string &Kernel::name() const +{ + return _name; +} + +const std::map> &Kernel::operands() const +{ + return _operands; +} + +std::map> &Kernel::operands() +{ + return _operands; +} + +prototype::GpuKernelWriterDataHolder *Kernel::impl() +{ + return _kernel.get(); +} + +} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp new file mode 100644 index 0000000000..5d79985e87 --- /dev/null +++ b/compute_kernel_writer/prototype/src/KernelWriter.cpp @@ -0,0 +1,227 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ckw/KernelWriter.h" +#include "ckw/Error.h" +#include "ckw/TensorOperand.h" +#include "src/Prototype.h" + +#include + +namespace ckw +{ + +namespace +{ + +inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info) +{ + return prototype::TensorInfo{ info.shape(), info.data_type(), info.data_layout(), info.id() }; +} + +} // namespace + +// ================================================================================================= +// Constructors and destructor +// ================================================================================================= + +KernelWriter::KernelWriter(Kernel &kernel) + : _kernel(&kernel), + _impl_attr(std::make_unique()), + _impl(prototype::GpuKernelWriterFactory::create(_impl_attr.get(), kernel.impl())) +{ + _impl->set_IdSpace(1); +} + +KernelWriter::~KernelWriter() +{ +} + +// ================================================================================================= +// Scope management +// ================================================================================================= + +int32_t KernelWriter::id_space() const +{ + return _id_space; +} + +KernelWriter &KernelWriter::id_space(int32_t id_space) +{ + CKW_ASSERT(id_space <= _max_id_space); + + _id_space = id_space; + return *this; +} + +int32_t KernelWriter::next_id_space() +{ + id_space(++_max_id_space); + return _id_space; +} + +// ================================================================================================= +// Tensor and tile declaration +// ================================================================================================= + +TensorOperand &KernelWriter::create_tensor_argument(const char *name, const TensorInfo &info) +{ + const auto var_name = generate_variable_name(name); + + _impl->declare_argument(var_name, create_impl_tensor_info(info)); + + auto operand = new TensorOperand(var_name, info); + register_operand(operand, false); + + return *operand; +} + +TileOperand &KernelWriter::create_tile_argument(const char *name, int32_t value) +{ + const auto var_name = generate_variable_name(name); + + auto operand = new TileOperand(var_name, value); + register_operand(operand, false); + + return *operand; +} + +std::string KernelWriter::generate_variable_name(const char *name) const +{ + std::stringstream var_name; + + var_name << "_" << _id_space << "_" << name; + + return var_name.str(); +} + +void KernelWriter::register_operand(OperandBase *operand, bool declaring) +{ + const auto &name = operand->name(); + auto &operands = _kernel->operands(); + + CKW_ASSERT(operands.find(name) == operands.end()); + operands[name] = std::unique_ptr(operand); + + if(declaring && !operand->is_constant()) + { + const auto tile = reinterpret_cast(operand); + + const auto &info = tile->tile_info(); + _impl->declare_tile(tile->name(), prototype::TileInfo(info.data_type(), info.width(), info.height())); + } +} + +// ================================================================================================= +// Load and store +// ================================================================================================= + +void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler) +{ + prototype::TensorOperand impl_tensor( + tensor.name(), + prototype::GpuSampler{ + sampler.format(), + prototype::GpuSamplerTensorStorage::BufferUint8Ptr, + sampler.address_mode_x(), + sampler.address_mode_y(), + sampler.address_mode_z() }); + + auto impl_x = sampler.x().create_impl_operand(_impl.get()); + auto impl_y = sampler.y().create_impl_operand(_impl.get()); + auto impl_z = sampler.z().create_impl_operand(_impl.get()); + auto impl_b = sampler.b().create_impl_operand(_impl.get()); + + auto impl_dst = tile.create_impl_operand(_impl.get()); + + _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b); +} + +void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler) +{ + prototype::TensorOperand impl_tensor( + tensor.name(), + prototype::GpuSampler{ + sampler.format(), + prototype::GpuSamplerTensorStorage::BufferUint8Ptr, + sampler.address_mode_x(), + sampler.address_mode_y(), + sampler.address_mode_z() }); + auto impl_src = tile.create_impl_operand(_impl.get()); + auto impl_x = sampler.x().create_impl_operand(_impl.get()); + auto impl_y = sampler.y().create_impl_operand(_impl.get()); + auto impl_z = sampler.z().create_impl_operand(_impl.get()); + auto impl_b = sampler.b().create_impl_operand(_impl.get()); + + _impl->op_store_immediate(impl_tensor, impl_src, impl_x, impl_y, impl_z, impl_b); +} + +// ================================================================================================= +// Data processing +// ================================================================================================= + +void KernelWriter::op_assign(TileOperand &dst, const TileOperand &src) +{ + auto impl_dst = dst.create_impl_operand(_impl.get()); + auto impl_src = src.create_impl_operand(_impl.get()); + + _impl->op_assign(impl_dst, impl_src); +} + +void KernelWriter::op_binary_expression(TileOperand &dst, const TileOperand &lhs, const TileOperand &rhs, BinaryOp op) +{ + auto impl_lhs = lhs.create_impl_operand(_impl.get()); + auto impl_rhs = rhs.create_impl_operand(_impl.get()); + auto impl_dst = dst.create_impl_operand(_impl.get()); + + _impl->op_binary_expression(impl_dst, impl_lhs, op, impl_rhs); +} + +void KernelWriter::op_scalar_function(TileOperand &dst, const TileOperand &src, ScalarUnaryFunction opcode) +{ + auto impl_dst = dst.create_impl_operand(_impl.get()); + auto impl_src = src.create_impl_operand(_impl.get()); + + _impl->op_scalar_function(impl_dst, impl_src, opcode); +} + +// ================================================================================================= +// Misc +// ================================================================================================= + +void KernelWriter::op_get_global_id(TileOperand &dst, int32_t dim) +{ + _impl->op_get_global_id(prototype::Operand(dst.name()), dim); +} + +// ================================================================================================= +// Code generation +// ================================================================================================= + +std::string KernelWriter::generate_code() +{ + return prototype::generate_code(*_kernel->impl(), _kernel->name()); +} + +} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/OperandBase.cpp b/compute_kernel_writer/prototype/src/OperandBase.cpp new file mode 100644 index 0000000000..59cf846cc7 --- /dev/null +++ b/compute_kernel_writer/prototype/src/OperandBase.cpp @@ -0,0 +1,50 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ckw/OperandBase.h" + +namespace ckw +{ + +OperandBase::OperandBase(const std::string &name) + : _name(name) +{ +} + +OperandBase::~OperandBase() +{ +} + +const std::string &OperandBase::name() const +{ + return _name; +} + +OperandBase &OperandBase::name(const std::string &name) +{ + _name = name; + return *this; +} + +} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h new file mode 100644 index 0000000000..b17481537f --- /dev/null +++ b/compute_kernel_writer/prototype/src/Prototype.h @@ -0,0 +1,3767 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H +#define CKW_PROTOTYPE_SRC_PROTOTYPE_H + +#include +#include +#include +#include // int32_t +#include // cout (to be removed) +#include // assert (to be removed) +#include +#include +#include +#include +#include +#include +#include + +#include "ckw/Types.h" +#include "ckw/TensorInfo.h" +#include "ckw/Error.h" + +namespace ckw +{ +namespace prototype { + +// Dummy data structure for Size2D +using Size2D = std::vector; + +// Dummy Status +using Status = void; + +enum class ComponentType : int32_t +{ + Complex = 0, + Simple = 1, + Unfusable = 2 +}; + +enum class GpuCompilationSpeed +{ + Fast = 0x00, // fast compilation may increase the latency of the network + Slow = 0x01 // slow compilation may decrease the latency of the network +}; + +enum class GpuExtensions +{ + Fp16, + Dot8, + Mmul, + FastMath +}; + +struct TensorInfo +{ + TensorShape shape { {0} }; + DataType data_type { DataType::Unknown }; + TensorDataLayout data_layout { TensorDataLayout::Nhwc }; + int32_t id { -1 }; +}; + +struct ComponentAttribute +{ + GpuCompilationSpeed compilation_speed {GpuCompilationSpeed::Fast}; + bool overwrite_tile { true }; +}; + +inline std::string data_type_to_cl_type(DataType dt) +{ + switch(dt) + { + case DataType::Fp32: + return "float"; + case DataType::Fp16: + return "half"; + case DataType::Int8: + return "char"; + case DataType::Uint8: + return "uchar"; + case DataType::Uint16: + return "ushort"; + case DataType::Int16: + return "short"; + case DataType::Uint32: + return "uint"; + case DataType::Int32: + return "int"; + case DataType::Bool: + return "bool"; + default: + assert(false); + return ""; + } +} + +inline int32_t width_to_cl_vector_size(int32_t width) +{ + switch(width) + { + case 1: + return 1; + case 2: + return 2; + case 3: + return 3; + case 4: + return 4; + case 5: + case 6: + case 7: + case 8: + return 8; + case 9: + case 10: + case 11: + case 12: + case 13: + case 14: + case 15: + case 16: + return 16; + default: + assert(false); + return 0; + } +} + +inline std::string get_cl_data_type(DataType dt, int32_t width) +{ + std::string data_type; + int32_t w = width_to_cl_vector_size(width); + data_type += data_type_to_cl_type(dt); + if(w != 1) + { + data_type += std::to_string(w); + } + return data_type; +} + +inline std::string to_opencl_store(int32_t vector_length) +{ + if(vector_length != 1) + { + return "vstore" + std::to_string(vector_length) + "("; + } + else + { + return "*("; + } +} + +struct TileInfo +{ + TileInfo() {} + TileInfo(DataType dt) : dt(dt), w(1), h(1) {} + TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1) {} + TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height) {} + DataType dt{ DataType::Unknown }; // Data type of the tile + int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels) + int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions) +}; + +inline std::ostream& operator << (std::ostream& o, const TileInfo& a) +{ + o << a.w << " x " << a.h; + return o; +} + +struct DataTypeAsString +{ + std::string str { "" }; + DataType dt { DataType::Unknown }; + int32_t size { 1 }; +}; + +struct ValueAsString +{ + std::string str { "" }; + DataTypeAsString type { }; +}; + +// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c +// A Tile is a collection of variables used to express a 2D data. +class IScalarTile +{ +public: + virtual ~IScalarTile() = default; + /** Method to get the scalar variable from a tile + * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge + * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge + * + * @return the scalar variable as a string + */ + virtual ValueAsString scalar(int32_t x, int32_t y) const = 0; + /** Method to get the list of underlying variable names used by the tile + * + * @return the list of variable names + */ + virtual std::vector underlying_source_variables() const = 0; + /** Method to get the name of the tile. + * + * @return the name of the tile + */ + std::string name() const + { + return _basename; + } + /** Method to get the tile format + * + * @return the format + */ + TileInfo format() const + { + return _format; + } + /** Method to know whether the tile is assignable or not (constant) + * + * @return true if the tile is assignable + */ + virtual bool is_assignable() const = 0; + /** Method to know whether the tile needs to be declared + * + * @return true if the tile needs to be declared in the code before being used + */ + virtual bool need_declaration() const = 0; +protected: + TileInfo _format { }; // Tile format + std::string _basename { "" }; // Tile name +}; + +// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context. +// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors +class IVectorTile : public IScalarTile +{ +public: + virtual ~IVectorTile() = default; + /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. + * The user can query the list of supported width for the vectors through preferred_vector_sizes(). + * + * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge + * + * @return the vector variable as a string + */ + virtual ValueAsString vector(int32_t y) const = 0; + /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. + * + * @return the vector variable as a string + */ + virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0; + /** Method to get the preferred vector sizes. + * + * @return a vector with the preferred vector sizes + */ + //virtual std::vector preferred_vector_sizes() const = 0; +}; + +class ClTile : public IVectorTile +{ +public: + ClTile(const std::string& name, TileInfo format) + { + _format = format; + _basename = name; + } + + ValueAsString scalar(int32_t x, int32_t y) const override + { + x = std::max(std::min(x, _format.w - 1), static_cast(0)); + y = std::max(std::min(y, _format.h - 1), static_cast(0)); + + ValueAsString t; + t.str = build_variable_name(y); + t.type.str = get_cl_data_type(_format.dt, 1); + t.type.dt = _format.dt; + t.type.size = 1; + + // Check required because if the width has only one element, we cannot use .s0 + if(_format.w != 1) + { + // Automatic broadcasting + t.str += ".s" + std::to_string(x); + } + + return t; + } + + ValueAsString vector(int32_t y) const override + { + y = std::max(std::min(y, _format.h - 1), static_cast(0)); + + ValueAsString t; + t.str = build_variable_name(y); + t.type.str = get_cl_data_type(_format.dt, _format.w); + t.type.dt = _format.dt; + t.type.size = _format.w; + return t; + } + + ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override + { + y = std::max(std::min(y, _format.h - 1), static_cast(0)); + + ValueAsString t; + t.str = build_variable_name(y); + t.type.str = get_cl_data_type(_format.dt, width); + t.type.dt = _format.dt; + t.type.size = width; + + if(_format.w != 1) + { + t.str += ".s"; + for(int i = 0; i < width; ++i) + { + t.str += to_scalar_hex(x_start + i); + } + } + return t; + } + + std::vector underlying_source_variables() const override + { + std::vector vars; + for(int32_t y = 0; y < _format.h; ++y) + { + ValueAsString t; + t.str = build_variable_name(y); + t.type.str = get_cl_data_type(_format.dt, _format.w); + t.type.dt = _format.dt; + t.type.size = _format.w; + vars.push_back(t); + } + return vars; + } + + bool is_assignable() const override + { + return true; + } + + bool need_declaration() const override + { + return true; + } + +private: + std::string build_variable_name(int32_t y) const + { + std::string var_name = _basename; + + if(_format.h == 1) + { + return var_name; + + } + else + { + var_name += "_"; + var_name += std::to_string(y); + } + + return var_name; + } + + std::string to_scalar_hex(int32_t x) const + { + switch(x) + { + case 0: + case 1: + case 2: + case 3: + case 4: + case 5: + case 6: + case 7: + case 8: + case 9: + return std::to_string(x); + case 10: + return "A"; + case 11: + return "B"; + case 12: + return "C"; + case 13: + return "D"; + case 14: + return "E"; + case 15: + return "F"; + default: + std::cout << "Unsupported hexadecimal value" << std::endl; + assert(false); + return ""; + } + } +}; + +// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time. +class ClConstantTile : public IVectorTile +{ +public: + ClConstantTile(const std::vector> &in, DataType dt) + { + _format.w = in[0].size(); + _format.h = in.size(); + _format.dt = dt; + + _data = std::vector>(_format.h, std::vector(_format.w)); + + for(int32_t y = 0; y < _format.h; ++y) + { + for(int32_t x = 0; x < _format.w; ++x) + { + _data[y][x] = in[y][x]; + } + } + } + + ValueAsString scalar(int32_t x, int32_t y) const override + { + x = std::max(std::min(x, _format.w - 1), static_cast(0)); + y = std::max(std::min(y, _format.h - 1), static_cast(0)); + + ValueAsString t; + t.str = _data[y][x]; + t.type.str = get_cl_data_type(_format.dt, 1); + t.type.dt = _format.dt; + t.type.size = 1; + + return t; + } + + ValueAsString vector(int32_t y) const override + { + y = std::max(std::min(y, _format.h - 1), static_cast(0)); + + return vector(0, _format.w, y); + } + + ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override + { + y = std::max(std::min(y, _format.h - 1), static_cast(0)); + + ValueAsString t; + t.str = ""; + t.type.str = get_cl_data_type(_format.dt, width); + t.type.dt = _format.dt; + t.type.size = width; + + if(width > 1) + { + t.str += "((" + get_cl_data_type(_format.dt, width) + ")("; + } + + int32_t x = x_start; + for(; x < width - 1; ++x) + { + t.str += scalar(x, y).str; + t.str += ", "; + } + t.str += scalar(x, y).str; + + if(width > 1) + { + t.str += "))"; + } + + return t; + } + + std::vector underlying_source_variables() const override + { + std::vector vars; + + for(int32_t y = 0; y < _format.h; ++y) + { + for(int32_t x = 0; x < _format.w; ++x) + { + ValueAsString t; + t.str = _data[y][x]; + t.type.str = get_cl_data_type(_format.dt, 1); + t.type.dt = _format.dt; + t.type.size = 1; + vars.push_back(t); + } + } + + return vars; + } + + bool is_assignable() const override + { + return false; + } + + bool need_declaration() const override + { + return false; + } + +private: + std::vector> _data{}; +}; + +enum class TensorComponentIndex : int32_t +{ + IndexMask = 0x0000000f, +}; + +enum class TensorComponentType : int32_t +{ + OffsetFirstElement = 0x00000100, + Stride = 0x00001000, + Dimension = 0x00010000, + FoldedDimension = 0x00100000, + Constant = 0x01000000 +}; + +enum class TensorComponent : int32_t +{ + Unknown = 0x00000000, + OffsetFirstElement = 0x00000100, + Stride1 = 0x00001001, + Stride2 = 0x00001002, + Stride3 = 0x00001003, + Stride4 = 0x00001004, + Dim0 = 0x00010000, + Dim1 = 0x00010001, + Dim2 = 0x00010002, + Dim3 = 0x00010003, + Dim4 = 0x00010004, + C = 0x00010000, // Dim0 + W = 0x00010001, // Dim1 + H = 0x00010002, // Dim2 + D = 0x00010003, + N = 0x00010004, + Dim1xDim2 = 0x00100021, + Dim1xDim2xDim3 = 0x00100321, + WxH = 0x00100021, + WxHxD = 0x00100321 +}; + +inline std::string to_string(TensorComponent x) +{ + switch(x) + { + case TensorComponent::Unknown: + return "Unknown"; + case TensorComponent::OffsetFirstElement: + return "OffsetFirstElement"; + case TensorComponent::Stride1: + return "Stride1"; + case TensorComponent::Stride2: + return "Stride2"; + case TensorComponent::Stride3: + return "Stride3"; + case TensorComponent::Stride4: + return "Stride4"; + case TensorComponent::Dim0: + return "Dim0"; + case TensorComponent::Dim1: + return "Dim1"; + case TensorComponent::Dim2: + return "Dim2"; + case TensorComponent::Dim3: + return "Dim3"; + case TensorComponent::Dim4: + return "Dim4"; + case TensorComponent::Dim1xDim2: + return "Dim1xDim2"; + case TensorComponent::Dim1xDim2xDim3: + return "Dim1xDim2xDim3"; + default: + assert(false); + } +} + +class ITensorArgument +{ +public: + virtual ~ITensorArgument() = default; + /** Method to get the tensor component as a string + * + * @param[in] x tensor component to query + * + * @return the tensor component as a string + */ + virtual std::string component(TensorComponent x) = 0; + /** Method to get the tensor component type declaration as a string + * + * @return the tensor component type declaration as a string + */ + virtual std::string component_type_declaration() const = 0; + /** Method to get the tensor component data type + * + * @return the tensor component data type + */ + virtual DataType component_data_type() const = 0; + /** Method to get the tensor component declarations + * + * @return a vector containing the tensor component declarations + */ + virtual std::vector component_declarations() const = 0; + /** Method to get the name of the tensor argument. + * + * @return the name of the tensor argument + */ + std::string name() const + { + return _basename; + } + /** Method to get the tensor format + * + * @return the format + */ + TensorInfo format() const + { + return _format; + } + +protected: + TensorInfo _format { }; + std::string _basename {}; +}; + +enum class GpuTensorStorage : int32_t +{ + Unknown = 0x0000, + BufferUint8Ptr = 0x0012, + Image2dReadOnly = 0x0020, + Image2dWriteOnly = 0x0021, + Image3dReadOnly = 0x0030, + Image3dWriteOnly = 0x0031 +}; + +class IGpuTensorArgument : public ITensorArgument +{ +public: + virtual ~IGpuTensorArgument() = default; + /** Method to get the tensor storage, which is the underlying storage used to keep the data memory + * + * @param[in] x tensor storage to query + * + * @return the tensor storage as a string + */ + virtual std::string storage(GpuTensorStorage x) = 0; + /** Method to get the tensor storage type declaration as a string + * + * @param[in] x tensor component to query + * + * @return the tensor storage type declaration as a string + */ + virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0; + /** Method to get the tensor storage declarations + * + * @return a vector containing the tensor storage declarations + */ + virtual std::vector storage_declarations() const = 0; +}; + +class ClTensorArgument : public IGpuTensorArgument +{ +public: + ClTensorArgument(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible) + { + _basename = name; + _format = x; + _return_by_value_when_possible = return_by_value_when_possible; + } + + // Methods to override + std::string component(TensorComponent x) override + { + if((static_cast(x) & static_cast(TensorComponentType::Constant))) + { + int32_t idx = static_cast(x) & static_cast(TensorComponentIndex::IndexMask); + return std::to_string(idx - 1); + } + + if(_return_by_value_when_possible) + { + if((static_cast(x) & static_cast(TensorComponentType::Dimension))) + { + int32_t idx = static_cast(x) & static_cast(TensorComponentIndex::IndexMask); + return std::to_string(_format.shape[idx]); + } + + if((static_cast(x) & static_cast(TensorComponentType::FoldedDimension))) + { + switch(x) + { + case TensorComponent::Dim1xDim2: + return std::to_string(_format.shape[1] * _format.shape[2]); + case TensorComponent::Dim1xDim2xDim3: + return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]); + default: + std::cout << "Unsupported folded dimension" << std::endl; + assert(false); + } + } + } + + if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end()) + { + _components_required.push_back(x); + } + + return build_component_name(x); + } + + std::string component_type_declaration() const override + { + return "int"; + }; + + DataType component_data_type() const override + { + return DataType::Int32; + } + + std::string storage(GpuTensorStorage x) override + { + if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end()) + { + _storage_required.push_back(x); + } + + return build_storage_name(x); + } + + std::string storage_type_declaration(GpuTensorStorage x) const override + { + switch(x) + { + case GpuTensorStorage::BufferUint8Ptr: + return "__global uchar*"; + case GpuTensorStorage::Image2dReadOnly: + return "__read_only image2d_t"; + case GpuTensorStorage::Image2dWriteOnly: + return "__write_only image2d_t"; + case GpuTensorStorage::Image3dReadOnly: + return "__read_only image3d_t "; + case GpuTensorStorage::Image3dWriteOnly: + return "__write_only image3d_t "; + default: + std::cout << "Unsupported storage" << std::endl; + assert(false); + return ""; + } + }; + + std::vector storage_declarations() const override + { + return _storage_required; + } + + std::vector component_declarations() const override + { + return _components_required; + } + +private: + std::string build_storage_name(GpuTensorStorage x) const + { + std::string var_name = _basename; + + switch(x) + { + case GpuTensorStorage::BufferUint8Ptr: + return var_name + "_ptr"; + case GpuTensorStorage::Image2dReadOnly: + case GpuTensorStorage::Image2dWriteOnly: + return var_name + "_img2d"; + case GpuTensorStorage::Image3dReadOnly: + case GpuTensorStorage::Image3dWriteOnly: + return var_name + "_img3d"; + default: + std::cout << "Unsupported storage" << std::endl; + assert(false); + } + + return var_name; + } + + std::string build_component_name(TensorComponent x) const + { + std::string var_name = _basename; + + switch(x) + { + case TensorComponent::OffsetFirstElement: + return var_name + "_offset_first_element"; + case TensorComponent::Stride1: + return var_name + "_stride1"; + case TensorComponent::Stride2: + return var_name + "_stride2"; + case TensorComponent::Stride3: + return var_name + "_stride3"; + case TensorComponent::Dim0: + return var_name + "_dim0"; + case TensorComponent::Dim1: + return var_name + "_dim1"; + case TensorComponent::Dim2: + return var_name + "_dim2"; + case TensorComponent::Dim3: + return var_name + "_dim3"; + case TensorComponent::Dim1xDim2: + return var_name + "_dim1xdim2"; + case TensorComponent::Dim1xDim2xDim3: + return var_name + "_dim1xdim2xdim3"; + default: + std::cout << "Unsupported component" << std::endl; + assert(false); + } + + return var_name; + } + + bool _return_by_value_when_possible { false }; + std::vector _storage_required {}; + std::vector _components_required {}; +}; + +/** + * @brief Data structure that contains the declared tiles by the components. + * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to + * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack + * and remove (pop) all the tiles from the level above. + * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code. + * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions + * when declaring tiles among different components. + * + */ +class GpuTileRegistry +{ +public: +enum class RegistryTileType +{ + Tile, + Link +}; + +using RegistryIdSpace = int32_t; +using RegistryLevel = int32_t; +using RegistryTileName = std::string; + +struct RegistryTileTableEntry +{ + RegistryLevel registry_level { 0 }; + std::unique_ptr tile_object { nullptr }; +}; + +struct RegistryTileTypeTableEntry +{ + RegistryTileType tile_type { RegistryTileType::Tile }; + RegistryTileName tile_name {}; + RegistryIdSpace registry_idspace { 0 }; + RegistryLevel registry_level { 0 }; +}; + +using RegistryTileTable = std::map>; +using RegistryTileTypeTable = std::map>; + /** + * @brief Construct a new Gpu Tile Registry object + * + */ + GpuTileRegistry() + { + _language = GpuTargetLanguage::Unknown; + } + /** + * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language + * + * @param[in] language Gpu programming language to use + */ + GpuTileRegistry(GpuTargetLanguage language) + { + _language = language; + } + /** + * @brief Default destructor. Destroy the Gpu Tile Registry object + * + */ + ~GpuTileRegistry() = default; + /** + * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles. + * Therefore, the IdSpace should be set before declaring any tiles. + * + * @param[in] id The IdSpace id + */ + void set_IdSpace(int32_t id) + { + _IdSpace = id; + } + /** + * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles + * + * @return The IdSpace id + */ + int32_t IdSpace() const + { + return _IdSpace; + } + /** + * @brief Gets all the IdSpace declarations defined in the tile registry. + * + * @return all the IdSpace declarations defined in the tile registry as std::vector. It returns an empty vector if there are no IdSpace declarations. + */ + std::vector IdSpace_declarations() const + { + std::vector x; + + auto it = _frags.begin(); + + while (it != _frags.end()) + { + x.push_back(it->first); + + it++; + } + + return x; + } + /** + * @brief Declare a tile from a previously created tile + */ + void insert(const std::string& name, const IVectorTile *frag) + { + assert(_language == GpuTargetLanguage::OpenCL); + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = name; + const std::string var_name = frag->name(); + TileInfo format = frag->format(); + + // First check whether a tile with the same name exists + IVectorTile *result = (*this)[key_var_name]; + assert(result == nullptr); + if(result == nullptr) + { + std::unique_ptr tile = std::make_unique(var_name, format); + + _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); + _frags[key_IdSpace][key_var_name].registry_level = _registry_level; + + _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link; + _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; + _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; + _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; + } + } + /** + * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace() + * + * @note The reference name used for declaring the tile should not be previously used in the IdSpace + * + * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry. + * @param[in] format Tile format use to use + */ + void insert(const std::string& name, const TileInfo& format) + { + assert(_language == GpuTargetLanguage::OpenCL); + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = name; + const std::string var_name = generate_tile_name(name); + + // First check whether a tile with the same name exists + IVectorTile *result = (*this)[key_var_name]; + assert(result == nullptr); + if(result == nullptr) + { + std::unique_ptr tile = std::make_unique(var_name, format); + _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); + _frags[key_IdSpace][key_var_name].registry_level = _registry_level; + + _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; + _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; + _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; + _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; + } + } + /** + * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string + * + * @note The reference name used for declaring the tile should not be previously used in the IdSpace + * + * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry. + * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile + * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure + * that the data type is aligned with the content of the std::string. + */ + void insert(const std::string& name, const std::vector>& in, DataType dt) + { + assert(_language == GpuTargetLanguage::OpenCL); + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = name; + + // First check whether a tile with the same name exists + IVectorTile *result = (*this)[key_var_name]; + assert(result == nullptr); + if(result == nullptr) + { + std::unique_ptr tile = std::make_unique(in, dt); + _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); + _frags[key_IdSpace][key_var_name].registry_level = _registry_level; + + _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; + _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; + _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; + _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; + } + } + /** + * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string + * + * @note This method can be used to declare temporary tiles that need to be accessed only once. + * + * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile + * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure + * that the data type is aligned with what passed with the std::string. + * + * @return IVectorTile* the anonymous constant tile + */ + IVectorTile* insert(const std::vector>& in, DataType dt) + { + assert(_language == GpuTargetLanguage::OpenCL); + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++); + + // First check whether a tile with the same name exists + IVectorTile *result = (*this)[key_var_name]; + assert(result == nullptr); + if(result == nullptr) + { + std::unique_ptr tile = std::make_unique(in, dt); + _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); + _frags[key_IdSpace][key_var_name].registry_level = _registry_level; + + _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; + _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; + _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; + _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; + } + + return (*this)[key_var_name]; + } + /** + * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user + * + * @param[in] name The name of the tile to retrieve + * @param[in] IdSpace The IdSpace id where to search the tile + * + * @return IVectorTile* The tile + */ + IVectorTile* get(const std::string& name, int32_t IdSpace) + { + const int32_t key_IdSpace = IdSpace; + const std::string key_var_name = name; + + IVectorTile* result = nullptr; + auto search_IdSpace = _frags.find(key_IdSpace); + if(search_IdSpace != _frags.end()) + { + auto search_tile = _frags[key_IdSpace].find(key_var_name); + if(search_tile != _frags[key_IdSpace].end()) + { + result = search_tile->second.tile_object.get(); + assert(result != nullptr); + } + } + + return result; + } + /** + * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace() + * + * @param[in] name The name of the tile to retrieve + * + * @return IVectorTile* The tile + */ + IVectorTile* operator[](const std::string& name) + { + return get(name, _IdSpace); + } + /** + * @brief Check whether the tile in the in the IdSpace provided by the user exists + * + * @param[in] name Name of the tile to search for + * @param[in] IdSpace The IdSpace id where to search the tile + * + * @return true if the tile exists + * @return false if the tile does not exist + */ + bool has_tile(const std::string& name, int32_t IdSpace) const + { + const int32_t key_IdSpace = IdSpace; + const std::string key_var_name = name; + + // IVectorTile* result = nullptr; + auto search_IdSpace = _frags.find(key_IdSpace); + + return search_IdSpace != _frags.end(); + } + /** + * @brief Check whether the tile within the current IdSpace exists + * + * @param[in] name Name of the tile to search for + * + * @return true if the tile exists + * @return false if the tile does not exist + */ + bool has_tile(const std::string& name) const + { + return has_tile(name, _IdSpace); + } + /** + * @brief Get all the tiles declared within the IdSpace provided by the user + * + * @param[in] IdSpace IdSpace where to retrieve all the declared tiles + * + * @return std::vector A vector with all the declared tiles in the IdSpace provided by the user + */ + std::vector tile_declarations(int32_t IdSpace) + { + std::vector tiles; + + std::map::iterator it = _frag_types[IdSpace].begin(); + + while (it != _frag_types[IdSpace].end()) + { + // The following line should be enabled. However, we cannot at this stage + // because it used to retrieve the output tile produced by each component. + // However, this method should NOT be used to retrieve the output tile + //if(it->second.tile_type == RegistryTileType::Tile) + { + tiles.push_back(get(it->second.tile_name, it->second.registry_idspace)); + } + it++; + } + + return tiles; + } + /** + * @brief Increase the level of stack. + * + */ + void increment_registry_level() + { + _registry_level++; + } + /** + * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack. + * + */ + void decrement_registry_level() + { + assert(_registry_level >= 0); + + // Remove all variables in the local scope + std::map::iterator it = _frags[_IdSpace].begin(); + + while (it != _frags[_IdSpace].end()) + { + if (it->second.registry_level == _registry_level) + { + it = _frags[_IdSpace].erase(it); + } + else + { + it++; + } + } + + std::map::iterator it_type = _frag_types[_IdSpace].begin(); + + while (it_type != _frag_types[_IdSpace].end()) + { + if (it_type->second.registry_level == _registry_level) + { + it_type = _frag_types[_IdSpace].erase(it_type); + } + else + { + it_type++; + } + } + + _registry_level--; + } + /** + * @brief Get the level of the stack + * + */ + int32_t level() const + { + return _registry_level; + } + +private: + // This method ensures that the key is unique among different components + std::string generate_tile_name(const std::string& name) + { + assert(_IdSpace >= 0 ); + if(_registry_level == 0) + { + return "_G" + std::to_string(_IdSpace) + "_" + name; + } + else + { + return name; + } + } + RegistryTileTable _frags {}; + RegistryTileTypeTable _frag_types {}; + RegistryLevel _registry_level { 0 }; + RegistryIdSpace _IdSpace { -1 }; + int32_t _anonymous_frag_count { 0 }; // Counter used to create the anonymous tiles + GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language +}; + +using TensorEntry = std::unique_ptr; + +/** + * @brief Data structure that contains the tensors consumed by the components. + * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions + * when declaring tensors among different components. + * + */ +class GpuTensorArgumentRegistry +{ +public: + /** + * @brief Construct a new Gpu Tensor Registry object + * + */ + GpuTensorArgumentRegistry() + { + _language = GpuTargetLanguage::Unknown; + } + /** + * @brief Construct a new Gpu Tensor Registry object + * + * @param[in] language Gpu programming language to use + */ + GpuTensorArgumentRegistry(GpuTargetLanguage language) + { + _language = language; + } + /** + * @brief Default destructor. Destroy the Gpu Tensor Registry object + * + */ + ~GpuTensorArgumentRegistry() = default; + /** + * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors. + * Therefore, the IdSpace should be set before declaring any tensors. + * + * @param[in] id The IdSpace id + */ + void set_IdSpace(int32_t id) + { + _IdSpace = id; + } + /** + * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors + * + * @return The IdSpace id + */ + int32_t IdSpace() const + { + return _IdSpace; + } + /** + * @brief Gets all the IdSpace declarations defined in the tensor registry. + * + * @return all the IdSpace declarations defined in the tensor registry as std::vector. It returns an empty vector if there are no IdSpace declarations. + */ + std::vector IdSpace_declarations() const + { + std::vector x; + + auto it = _refs.begin(); + + while (it != _refs.end()) + { + x.push_back(it->first); + + it++; + } + + return x; + } + /** + * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace() + * + * @note The reference name used for declaring the tensor should not be previously used in the IdSpace + * + * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry. + * @param[in] x Pair of tensor info and tensor id + * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components + */ + void insert(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible) + { + assert(_language == GpuTargetLanguage::OpenCL); + const int32_t key_IdSpace = _IdSpace; + const int32_t tensor_id = x.id; + const std::string key_var_name = name; + const std::string var_name = generate_tensor_name(name, tensor_id); + + // First, check whether the tensor has already a reference. If so, trigger an assert + assert(!has_tensor_argument(name)); + + // Check whether a tensor with that tensorID exists + auto result = _tensor_arguments.find(tensor_id); + if(result == _tensor_arguments.end()) + { + // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference + std::unique_ptr arg = std::make_unique(var_name, x, return_by_value_when_possible); + _tensor_arguments[tensor_id] = std::move(arg); + } + + _refs[key_IdSpace][key_var_name] = tensor_id; + } + /** + * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace() + * + * @param[in] name The name of the tensor to retrieve + * + * @return IGpuTensor* The tensor + */ + IGpuTensorArgument* operator[](const std::string& name) + { + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = name; + + IGpuTensorArgument* result = nullptr; + auto search_IdSpace = _refs.find(key_IdSpace); + if(search_IdSpace != _refs.end()) + { + auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); + + if(search_tensor_id != _refs[key_IdSpace].end()) + { + const int32_t tensor_id = search_tensor_id->second; + auto search_tensor_argument = _tensor_arguments.find(tensor_id); + if(search_tensor_argument != _tensor_arguments.end()) + { + result = search_tensor_argument->second.get(); + } + assert(result != nullptr); + } + } + + return result; + } + /** + * @brief Get all the tensors declared in the IdSpace provided by the user + * + * @return std::vector A vector with all the declared tensors + */ + std::vector tensor_argument_declarations() + { + std::vector args; + + auto it = _tensor_arguments.begin(); + + while (it != _tensor_arguments.end()) + { + args.push_back(it->second.get()); + it++; + } + + return args; + } + /** + * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists + * + * @param[in] name Name of the tensor argument to search for + * + * @return true if the tensor argument exists + * @return false if the tensor argument does not exist + */ + bool has_tensor_argument(const std::string& name) + { + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = name; + + auto search_IdSpace = _refs.find(key_IdSpace); + + if(search_IdSpace != _refs.end()) + { + auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); + + return search_tensor_id != _refs[key_IdSpace].end(); + } + else + { + return false; + } + } + /** + * @brief Check whether the tensor argument is in the the IdSpace provided by the user + * + * @param[in] name Name of the tensor argument to search for + * @param[in] IdSpace The IdSpace id where to search the tensor argument + * + * @return true if the tile exists + * @return false if the tile does not exist + */ + bool has_tensor_argument(const std::string& name, int32_t IdSpace) + { + const int32_t key_IdSpace = IdSpace; + const std::string key_var_name = name; + + auto search_IdSpace = _refs.find(key_IdSpace); + + if(search_IdSpace != _refs.end()) + { + auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); + + return search_tensor_id != _refs[key_IdSpace].end(); + } + else + { + return false; + } + } +private: + // This method ensures that the key is unique among different components + std::string generate_tensor_name(const std::string& name, int32_t tensor_id) + { + assert(tensor_id >= 0 ); + + return name + std::to_string(tensor_id); + } + + std::map _tensor_arguments {}; + std::map> _refs {}; + int32_t _IdSpace { -1 }; + GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language +}; + +enum class OpType : int32_t +{ + Elementwise = 0x0000, + Relational = 0x1000, + Algebra = 0x2000 +}; + +inline std::string to_string(AssignmentOp op) +{ + switch(op) + { + case AssignmentOp::Decrement: + return "-="; + case AssignmentOp::Increment: + return "+="; + default: + assert(false); + return ""; + } +} + +inline std::string to_string(BinaryOp op) +{ + switch(op) + { + case BinaryOp::Add: + return "+"; + case BinaryOp::Sub: + return "-"; + case BinaryOp::Mul: + return "*"; + case BinaryOp::Div: + return "/"; + case BinaryOp::Mod: + return "%"; + case BinaryOp::Equal: + return "=="; + case BinaryOp::Less: + return "<"; + case BinaryOp::LessEqual: + return "<="; + case BinaryOp::Greater: + return ">"; + case BinaryOp::GreaterEqual: + return ">="; + case BinaryOp::LogicalAnd: + return "&&"; + case BinaryOp::LogicalOr: + return "||"; + case BinaryOp::LogicalNot: + return "!"; + default: + assert(false); + return ""; + } +} + +inline std::string binary_op_string(BinaryOp op) +{ + switch(op) + { + case BinaryOp::Add: + return "add"; + case BinaryOp::Sub: + return "sub"; + case BinaryOp::Mul: + return "mul"; + case BinaryOp::Div: + return "div"; + case BinaryOp::Mod: + return "mod"; + case BinaryOp::Equal: + return "eq"; + case BinaryOp::Less: + return "gt"; + case BinaryOp::LessEqual: + return "gteq"; + case BinaryOp::Greater: + return "lt"; + case BinaryOp::GreaterEqual: + return "lte"; + default: + assert(false); + return ""; + } +} + +enum class OperandType : int32_t +{ + Unknown = 0x00000000, + ScalarFp32 = 0x00001011, // Immediate scalar tile + ScalarFp16 = 0x00001012, // Immediate scalar tile + ScalarInt32 = 0x00001021, // Immediate scalar tile + ScalarInt16 = 0x00001022, // Immediate scalar tile + ScalarInt8 = 0x00001024, // Immediate scalar tile + ScalarUInt32 = 0x00001031, // Immediate scalar tile + ScalarUInt16 = 0x00001032, // Immediate scalar tile + ScalarUInt8 = 0x00001034, // Immediate scalar tile + ScalarBool = 0x00001041, // Immediate scalar tile + ScalarTile = 0x00001050, // Scalar from a tile + Tile = 0x00010000, // Tile + TensorStride1 = 0x00100001, // Tensor component + TensorStride2 = 0x00100002, // Tensor component + TensorStride3 = 0x00100003, // Tensor component + TensorStride4 = 0x00100004, // Tensor component + TensorDim0 = 0x00100010, // Tensor component + TensorDim1 = 0x00100020, // Tensor component + TensorDim2 = 0x00100030, // Tensor component + TensorDim3 = 0x00100040, // Tensor component + TensorDim4 = 0x00100050, // Tensor component + TensorC = 0x00100010, // Tensor component + TensorW = 0x00100020, // Tensor component + TensorH = 0x00100030, // Tensor component + TensorD = 0x00100040, // Tensor component + TensorN = 0x00100050, // Tensor component + TensorDim1xDim2 = 0x00100100, // Tensor component + TensorDim1xDim2xDim3 = 0x00100200, // Tensor component + TensorWxH = 0x00100300, // Tensor component + TensorWxHxD = 0x00100400, // Tensor component + TensorDataOffset = 0x00100500, // Tensor component +}; + +struct ScalarTileCoord +{ + ScalarTileCoord() {} + ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0) {} + int32_t x { -1 }; + int32_t y { -1 }; +}; +/** + * @brief Operand class. This object is used to pass the operands to the operations performed by the writer. + * Operand can be of three types: + * -# Scalar immediate: constant expression + * -# Tile: A tile + * -# Tensor component: A component (scalar) of a tensor + * + */ +class Operand +{ +public: + Operand(const std::string &val) + { + _str = val; + _type = OperandType::Tile; + } + + Operand(const std::string &val, const ScalarTileCoord& coord) + { + _str = val; + _type = OperandType::ScalarTile; + _coord = coord; + } + + Operand(const std::string &val, OperandType type) + { + _str = val; + _type = type; + } + + Operand(const Operand& t) + { + _str = t.value(); + _type = t.type(); + } + + Operand& operator=(const Operand& t) + { + _str = t.value(); + _type = t.type(); + _coord = t.scalar_tile_coordinate(); + return *this; + } + + std::string value() const + { + return _str; + } + + OperandType type() const + { + return _type; + } + + ScalarTileCoord scalar_tile_coordinate() const + { + return _coord; + } + +private: + std::string _str {}; + OperandType _type { OperandType::Unknown }; + ScalarTileCoord _coord {}; +}; + +enum class GpuSamplerTensorStorage : int32_t +{ + Unknown = static_cast(GpuTensorStorage::Unknown), + BufferUint8Ptr = static_cast(GpuTensorStorage::BufferUint8Ptr), + Image2dReadOnly = static_cast(GpuTensorStorage::Image2dReadOnly), + Image2dWriteOnly = static_cast(GpuTensorStorage::Image2dWriteOnly), + Image3dReadOnly = static_cast(GpuTensorStorage::Image3dReadOnly), + Image3dWriteOnly = static_cast(GpuTensorStorage::Image2dWriteOnly), +}; + +struct GpuSampler +{ + GpuSampler() = default; + TensorSamplerFormat format { TensorSamplerFormat::Unknown }; + GpuSamplerTensorStorage storage { GpuSamplerTensorStorage::Unknown }; + TensorSamplerAddressModeX address_mode_x { TensorSamplerAddressModeX::Unknown }; + TensorSamplerAddressModeY address_mode_y { TensorSamplerAddressModeY::Unknown }; + TensorSamplerAddressModeZ address_mode_z { TensorSamplerAddressModeZ::Unknown }; +}; + +inline GpuSampler create_simple_sampler(const TensorInfo* tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z) +{ + CKW_UNUSED(step_x, step_y, step_z); + + auto tensor = tensor_info_id->shape; + + GpuSampler dst_sampler; + dst_sampler.format = sampler.format; + dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr; + dst_sampler.address_mode_x = sampler.address_mode_x; + dst_sampler.address_mode_y = sampler.address_mode_y; + dst_sampler.address_mode_z = sampler.address_mode_z; + + int32_t dim_x = 0; + int32_t dim_y = 0; + int32_t dim_z = 0; + + switch(sampler.format) + { + case TensorSamplerFormat::C_W_H: + dim_x = tensor[0]; + dim_y = tensor[1]; + dim_z = tensor[2]; + break; + case TensorSamplerFormat::C_WH_1: + dim_x = tensor[0]; + dim_y = tensor[1] * tensor[2]; + dim_z = 1; + break; + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + break; + } + + if(dim_x == 1) + { + assert(step_x == 1); + dst_sampler.address_mode_x = TensorSamplerAddressModeX::None; + } + + if(dim_y == 1) + { + assert(step_y == 1); + dst_sampler.address_mode_y = TensorSamplerAddressModeY::None; + } + + if(dim_z == 1) + { + assert(step_z == 1); + dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None; + } + + return dst_sampler; +} + +class GpuOutputSampler +{ +public: + GpuOutputSampler() = default; + /** + * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once + * by the root component. Once initialized, all simpler components will need to used this sampler + * or a broadcasted version of it + * + * @param[in] sampler GpuSampler + * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile! + * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile! + * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile! + */ + void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z) + { + assert(_is_initialized == false); + + _step_x = step_x; + _step_y = step_y; + _step_z = step_z; + _tensor_info_id = tensor_info_id; + _sampler = create_sampler(tensor_storage, tensor_format); + _is_initialized = true; + }; + + GpuSampler sampler() const + { + return _sampler; + }; + + int32_t step_x() const + { + return _step_x; + }; + + int32_t step_y() const + { + return _step_y; + }; + + int32_t step_z() const + { + return _step_z; + }; +private: + GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format) + { + // Output can only be in output mode + assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly); + assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly); + + auto tensor = _tensor_info_id->shape; + + GpuSampler sampler; + sampler.format = tensor_format; + sampler.storage = tensor_storage; + sampler.address_mode_x = TensorSamplerAddressModeX::None; + sampler.address_mode_y = TensorSamplerAddressModeY::None; + sampler.address_mode_z = TensorSamplerAddressModeZ::None; + + // In the case of texture, we do not need any special checks at the border + if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr) + { + int32_t dim_x = 0; + int32_t dim_y = 0; + int32_t dim_z = 0; + + switch(tensor_format) + { + case TensorSamplerFormat::C_W_H: + dim_x = tensor[0]; + dim_y = tensor[1]; + dim_z = tensor[2]; + break; + case TensorSamplerFormat::C_WH_1: + dim_x = tensor[0]; + dim_y = tensor[1] * tensor[2]; + dim_z = 1; + break; + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + break; + } + + if((dim_x % _step_x) != 0 && dim_x != 1) + { + sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin; + } + + if((dim_y % _step_y) != 0 && dim_y != 1) + { + sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly; + } + + if((dim_z % _step_z) != 0 && dim_z != 1) + { + sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly; + } + } + + return sampler; + } + GpuSampler _sampler { }; // GpuSampler + int32_t _step_x { 1 }; + int32_t _step_y { 1 }; + int32_t _step_z { 1 }; + const TensorInfo* _tensor_info_id { nullptr }; + bool _is_initialized { false }; +}; + +/** + * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer. + */ +class TensorOperand +{ +public: + TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler) + { + } + + TensorOperand& operator=(const TensorOperand& t) + { + _str = t.value(); + _sampler = t.sampler(); + return *this; + } + + std::string value() const + { + return _str; + } + + GpuSampler sampler() const + { + return _sampler; + } + +private: + std::string _str {}; + GpuSampler _sampler {}; +}; + +/** + * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer + * This data structure must be initialized before being passed to the Gpu Kernel Writer + * + */ +class GpuKernelWriterDataHolder +{ +public: + /** + * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store + * the GPU target and target specific capabilities (extensions). For now, we just initialize the + * programming language + * + * @param[in] language Gpu programming language to use + */ + GpuKernelWriterDataHolder(GpuTargetLanguage language) : tiles(language), arguments(language), code(""), _language(language) + { + } + /** + * @brief Get the Gpu programming language used + * + * @return GpuTargetLanguage the Gpu programming language + */ + GpuTargetLanguage programming_language() const + { + return _language; + } + /** + * @brief @ref GpuTileRegistry + * + */ + GpuTileRegistry tiles{}; + /** + * @brief @ref GpuTensorArgumentRegistry + * + */ + GpuTensorArgumentRegistry arguments{}; + /** + * @brief @ref GpuOutputSampler. + * + */ + GpuOutputSampler output_sampler{}; + /** + * @brief Source code + * + */ + std::string code{}; + + // GpuExtensionRegistry extensions{}; +private: + GpuTargetLanguage _language; +}; + +struct LWS +{ + int32_t x {1}; + int32_t y {1}; + int32_t z {1}; +}; + +/** + * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker + * declare an anonymous tile in the tile registry. + */ +class OperandUnpacker +{ +public: + OperandUnpacker(GpuTileRegistry& tiles, GpuTensorArgumentRegistry& arguments) : _tiles(tiles), _arguments(arguments) + { + // Increase the level of the stack to allocate possible temporary tiles + _tiles.increment_registry_level(); + }; + + ~OperandUnpacker() + { + // Decrease the level of the stack to deallocate any temporary tiles + _tiles.decrement_registry_level(); + } + + IVectorTile* unpack(const Operand& src) + { + // Get the tile + if(src.type() == OperandType::Tile) + { + assert(_tiles.has_tile(src.value())); + return _tiles[src.value()]; + } + // Create an anonymous tile with a constant + else if(static_cast(src.type()) & 0x00001000) + { + if(src.type() == OperandType::ScalarTile) + { + ScalarTileCoord coord = src.scalar_tile_coordinate(); + assert(_tiles.has_tile(src.value())); + assert(coord.x >= 0); + assert(coord.y >= 0); + auto val = _tiles[src.value()]->scalar(coord.x, coord.y); + return _tiles.insert({{{val.str}}}, val.type.dt); + } + else + { + return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type())); + } + } + // Create an anonymous tile with the tensor component + else + { + assert(_arguments.has_tensor_argument(src.value())); + auto x = _arguments[src.value()]; + const std::string val = x->component(to_tensor_component(src.type())); + const DataType dt = x->component_data_type(); + return _tiles.insert({{{val}}}, dt); + } + } + +private: + DataType to_tile_data_type(OperandType x) + { + return static_cast(static_cast(x) & 0x00ff); + } + + TensorComponent to_tensor_component(OperandType x) + { + switch(x) + { + case OperandType::TensorDim0: + return TensorComponent::Dim0; + case OperandType::TensorDim1: + return TensorComponent::Dim1; + case OperandType::TensorDim2: + return TensorComponent::Dim2; + case OperandType::TensorDim3: + return TensorComponent::Dim3; + case OperandType::TensorDim4: + return TensorComponent::Dim4; + case OperandType::TensorStride1: + return TensorComponent::Stride1; + case OperandType::TensorStride2: + return TensorComponent::Stride2; + case OperandType::TensorStride3: + return TensorComponent::Stride3; + case OperandType::TensorStride4: + return TensorComponent::Stride4; + case OperandType::TensorDim1xDim2: + return TensorComponent::Dim1xDim2; + case OperandType::TensorDim1xDim2xDim3: + return TensorComponent::Dim1xDim2xDim3; + case OperandType::TensorDataOffset: + return TensorComponent::OffsetFirstElement; + default: + assert(false); + return TensorComponent::Unknown; + } + } + + GpuTileRegistry& _tiles; + GpuTensorArgumentRegistry& _arguments; +}; + +/** + * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker + * declare an anonymous tile in the tile registry. + * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure. + */ +class TensorOperandUnpacker +{ +public: + TensorOperandUnpacker(GpuTensorArgumentRegistry& arguments) : _arguments(arguments) + { + }; + + IGpuTensorArgument* unpack(const TensorOperand& src) + { + assert(_arguments.has_tensor_argument(src.value())); + return _arguments[src.value()]; + } + +private: + GpuTensorArgumentRegistry& _arguments; +}; + +/** + * @brief The GpuKernel will be used in three occasions (stages): + * #- Compilation stage + * #- Tuning stage + * #- Dispatch stage + */ +struct GpuKernel +{ + // Compilation stage + std::string code {}; // Source code, required for the compilation stage + std::vector list_extensions{}; // Extensions, required for the compilation stage + // Tuning stage + std::string config_id {}; // Unique id, required for the tuning stage + std::vector list_lws{}; // LWS to test, required for the tuning stage + // Dispatch stage + GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage + std::vector> list_tensor_storages; // List of tensor storages, required for the dispatch stage + std::vector> list_tensor_components;// List of tensor components (width, stride,..), required for the dispatch stage) +}; + +// This function should produce an object with the source +inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string& name) +{ + std::string code; + code += "__kernel void "; + code += name; + code += "(\n"; + + auto IdSpaces = in.arguments.IdSpace_declarations(); + + std::vector arg_str; + + auto tensor_args = in.arguments.tensor_argument_declarations(); + + for(auto &i : tensor_args) + { + // For each tensor used, get the storage and tensor components + auto storages = i->storage_declarations(); + auto components = i->component_declarations(); + + for(auto &y : storages) + { + std::string str; + str += i->storage_type_declaration(y); + str += " "; + str += i->storage(y); + arg_str.push_back(str); + } + + for(auto &y : components) + { + std::string str; + str += i->component_type_declaration(); + str += " "; + str += i->component(y); + arg_str.push_back(str); + } + } + + for(size_t i = 0; i < arg_str.size(); ++i) + { + code += arg_str[i]; + if(i + 1 < arg_str.size()) + { + code += ",\n"; + } + } + + code += ")\n"; + code += "{\n"; + code += in.code; + code += "}\n"; + + return code; +} + +/** + * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know + * how to reduce the dimensionality of a tensor + * + */ +class GpuTensor3dMapper +{ +public: + GpuTensor3dMapper(IGpuTensorArgument* tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor) + { + }; + + std::string tensor_component_x() const + { + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + case TensorSamplerFormat::C_W_H: + return _tensor->component(TensorComponent::C); + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + std::string tensor_component_y() const + { + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + return _tensor->component(TensorComponent::WxH); + case TensorSamplerFormat::C_W_H: + return _tensor->component(TensorComponent::W); + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + std::string tensor_component_z() const + { + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + return "1"; + case TensorSamplerFormat::C_W_H: + return _tensor->component(TensorComponent::H); + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + std::string tensor_component_stride_y() const + { + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + case TensorSamplerFormat::C_W_H: + return _tensor->component(TensorComponent::Stride1); + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + std::string tensor_component_stride_z() const + { + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + return "0"; + case TensorSamplerFormat::C_W_H: + return _tensor->component(TensorComponent::Stride2); + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + std::string tensor_component_stride_batch() const + { + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + case TensorSamplerFormat::C_W_H: + return _tensor->component(TensorComponent::Stride3); + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + bool is_one_component_x() const + { + auto t = _tensor->format(); + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + case TensorSamplerFormat::C_W_H: + return t.shape[0] == 1; + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + bool is_one_component_y() const + { + auto t = _tensor->format(); + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + return (t.shape[1] * t.shape[2]) == 1; + case TensorSamplerFormat::C_W_H: + return t.shape[1] == 1; + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + bool is_one_component_z() const + { + auto t = _tensor->format(); + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + return true; + case TensorSamplerFormat::C_W_H: + return t.shape[2] == 1; + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + bool is_one_component_batch() const + { + auto t = _tensor->format(); + const auto format = _sampler.format; + switch(format) + { + case TensorSamplerFormat::C_WH_1: + case TensorSamplerFormat::C_W_H: + return t.shape[3] == 1; + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + return ""; + } + } + + GpuSampler gpu_sampler() const + { + return _sampler; + } + + IGpuTensorArgument* tensor_argument() const + { + return _tensor; + } + +private: + GpuSampler _sampler; + IGpuTensorArgument* _tensor; +}; + +struct GpuKernelWriterAttribute +{ + bool return_tensor_component_by_value { false }; +}; + +enum class ConvertPolicy +{ + Wrap, /**< Wrap around */ + Saturate /**< Saturate */ +}; + +enum class RoundingMode +{ + None, + Rte, + Rtz, + Rtp, + Rtn +}; + +// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html +class IGpuKernelWriter +{ +public: + virtual ~IGpuKernelWriter() = default; + virtual void set_IdSpace(int32_t id) = 0; + virtual void import_tile(const std::string& dst, const IVectorTile *src) = 0; + virtual void declare_argument(const std::string& name, const TensorInfo& tensor) = 0; + virtual void declare_tile(const std::string& name, const TileInfo& info) = 0; + virtual void declare_const_tile(const std::string& name, const std::vector>& in, DataType dt) = 0; + virtual void write_text(const std::string& x) = 0; + virtual void compound_statement_begin() = 0; + virtual void compound_statement_end() = 0; + + // Operations + virtual void op_get_global_id(const Operand& dst_var, int32_t dim) = 0; + virtual void op_get_global_coord(const Operand& dst, const Operand& step, const TensorOperand& tensor, int32_t dim) = 0; + virtual void op_get_global_batch(const Operand& dst, const TensorOperand& tensor) = 0; + virtual void op_get_global_size(const Operand& dst_var, int32_t dim) = 0; + virtual void op_binary_expression(const Operand& dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; + virtual void op_assign(const Operand& dst_name, const Operand& src_name) = 0; + virtual void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) = 0; + virtual void op_if(const Operand& lhs, BinaryOp op, const Operand& rhs) = 0; + virtual void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value, AssignmentOp update_op, const Operand& update_value) = 0; + virtual void op_load_indirect(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y_indirect, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0; + virtual void op_load_immediate(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32), const Operand& dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; + virtual void op_store_immediate(const TensorOperand& tensor, const Operand& src, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0; + virtual void op_cast_expression(const Operand& dst, const Operand &src, ConvertPolicy policy) = 0; + virtual void op_return() = 0; + // virtual void op_else() = 0; + // virtual void op_elseif() = 0; + // Utils + // It is the process of converting + virtual void util_get_indirect_buffer(const Operand& dst, const TensorOperand& tensor, const Operand& x, const Operand& y, const Operand& x_off, const Operand& y_off) = 0; +}; + +enum class GpuLoadStoreType +{ + Load = 1, + Store = 2 +}; + +class IGpuLoadStoreHelperWriter +{ +public: + IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type) : _writer(x), _mapper(mapper), _type(type) {} + IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default; + IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default; + virtual ~IGpuLoadStoreHelperWriter() = default; + virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0; + virtual void write(const std::pair& y) = 0; + virtual void finalize() = 0; +protected: + IGpuKernelWriter* _writer; + GpuTensor3dMapper _mapper; + GpuLoadStoreType _type; +}; + +class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter +{ +public: + ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type) + { + } + + ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default; + ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default; + + static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) + { + CKW_UNUSED(x, type, dst); + + if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr) + { + return false; + } + return true; + } + + void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override + { + assert(validate(_writer, _mapper, _type, dst)); + + _dst = dst; + _ls_width_full = dst->format().w; + + _coord_x = x->scalar(0, 0).str; + _coord_z = z->scalar(0, 0).str; + _coord_b = b->scalar(0, 0).str; + _coord_orig_z = _coord_z; + + out_of_bound_initialize_x(_coord_x); + out_of_bound_initialize_z(_coord_z); + + /* + meaning of else: + - x: partial load/store + - y: no load/store operation + - z: no load/store operation + if(x) + { + if(z) + { + if(y) + { + // full load/store width + } + else + { + // no load/store + } + } + else + { + // no load/store + } + } + else + { + if(z) + { + if(y) + { + // partial load/store width + } + else + { + // no load/store + } + } + else + { + // no load/store + } + } + */ + } + + void write(const std::pair& y) override + { + int32_t idx_y = y.first; + std::string coord_y = y.second; + + // The only check required is on Y. + out_of_bound_initialize_y(coord_y); + + const std::string dst = _dst->vector(idx_y).str; + const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b); + const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address); + + _writer->write_text(ls_buf); + _writer->write_text(";\n"); + + out_of_bound_finalize_y(dst); + + // The left over load/store will be written in the finalize stage + if(_ls_width_part.size() != 0) + { + int32_t w = 0; + for(auto &p : _ls_width_part) + { + const std::string dst0 = _dst->vector(w, p, idx_y).str; + const std::string coord_x = _coord_x + " + " + std::to_string(w); + const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b); + const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address); + _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0)); + + w += p; + } + } + } + + void finalize() override + { + out_of_bound_finalize_z(); + out_of_bound_finalize_x(); + } +private: + IVectorTile* _dst { nullptr }; + int32_t _ls_width_full { 0 }; + std::vector _ls_width_part { }; + std::vector, std::string>> _leftovers_x {}; + std::string _coord_x {}; + std::string _coord_z {}; + std::string _coord_orig_z {}; + std::string _coord_b {}; + + void out_of_bound_initialize_x(std::string& coord) + { + if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + { + auto tensor_format = _mapper.tensor_argument()->format(); + auto shape = tensor_format.shape; + + _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full); + if(_ls_width_part.size() != 0) + { + _writer->write_text("if(" + coord + " > 0)\n"); + _writer->compound_statement_begin(); + } + } + }; + + void out_of_bound_finalize_x() + { + if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + { + if(_ls_width_part.size() != 0) + { + _writer->compound_statement_end(); + _writer->write_text("else\n"); + _writer->compound_statement_begin(); + + out_of_bound_initialize_z(_coord_orig_z); + for(auto &i : _leftovers_x) + { + out_of_bound_initialize_y(i.first.second); + _writer->write_text(i.second); + _writer->write_text(";\n"); + out_of_bound_finalize_y(i.first.first); + } + out_of_bound_finalize_z(); + _writer->compound_statement_end(); + } + } + }; + + void out_of_bound_initialize_y(std::string& coord) + { + std::string max = ""; + + const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; + + switch(address_mode_y) + { + case TensorSamplerAddressModeY::Skip: + case TensorSamplerAddressModeY::ClampToBorder: + // NOTE: This line should not be moved outside of the switch statement. + // The reason for that is because when we query the component, the component is marked as used + // and added to the list of arguments of the kernel. Since, not in all cases this component is required, + // we should request the component only when used + max = _mapper.tensor_component_y(); + _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); + _writer->compound_statement_begin(); + break; + case TensorSamplerAddressModeY::SkipMinEdgeOnly: + case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: + _writer->write_text("if(" + coord + " >= 0)\n"); + _writer->compound_statement_begin(); + break; + case TensorSamplerAddressModeY::SkipMaxEdgeOnly: + case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: + max = _mapper.tensor_component_y(); + _writer->write_text("if(" + coord + " < " + max + ")\n"); + _writer->compound_statement_begin(); + break; + case TensorSamplerAddressModeY::ClampToNearest: + max = _mapper.tensor_component_y(); + coord = "clamp(" + coord + ", 0, " + max + " - 1)"; + break; + case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: + max = _mapper.tensor_component_y(); + coord = "min(" + coord + ", " + max + " - 1)"; + break; + case TensorSamplerAddressModeY::ClampToMinEdgeOnly: + coord = "max(" + coord + ", 0)"; + break; + case TensorSamplerAddressModeY::None: + break; + default: + std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; + assert(false); + } + }; + + void out_of_bound_finalize_y(const std::string& dst) + { + const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; + + switch(address_mode_y) + { + case TensorSamplerAddressModeY::ClampToBorder: + case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: + case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: + case TensorSamplerAddressModeY::Skip: + case TensorSamplerAddressModeY::SkipMaxEdgeOnly: + case TensorSamplerAddressModeY::SkipMinEdgeOnly: + _writer->compound_statement_end(); + break; + + default: + assert(false); + } + + switch(address_mode_y) + { + case TensorSamplerAddressModeY::ClampToBorder: + case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: + case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: + _writer->write_text("else\n"); + _writer->compound_statement_begin(); + _writer->write_text(dst); + _writer->write_text(" = 0.0f;\n"); + _writer->compound_statement_end(); + break; + + default: + assert(false); + } + }; + + void out_of_bound_initialize_z(std::string& coord) + { + std::string max = ""; + + const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; + + switch(address_mode_z) + { + case TensorSamplerAddressModeZ::Skip: + max = _mapper.tensor_component_z(); + _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); + _writer->compound_statement_begin(); + break; + case TensorSamplerAddressModeZ::SkipMinEdgeOnly: + _writer->write_text("if(" + coord + " >= 0)\n"); + _writer->compound_statement_begin(); + break; + case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: + max = _mapper.tensor_component_z(); + _writer->write_text("if(" + coord + " < " + max + ")\n"); + _writer->compound_statement_begin(); + break; + case TensorSamplerAddressModeZ::ClampToNearest: + max = _mapper.tensor_component_z(); + coord = "clamp(" + coord + ", 0, " + max + " - 1)"; + break; + case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly: + max = _mapper.tensor_component_z(); + coord = "min(" + coord + ", " + max + " - 1)"; + break; + case TensorSamplerAddressModeZ::ClampToMinEdgeOnly: + coord = "max(" + coord + ", 0)"; + break; + case TensorSamplerAddressModeZ::None: + break; + default: + std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; + assert(false); + } + }; + + void out_of_bound_finalize_z() + { + const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; + + switch(address_mode_z) + { + case TensorSamplerAddressModeZ::Skip: + case TensorSamplerAddressModeZ::SkipMinEdgeOnly: + case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: + _writer->compound_statement_end(); + break; + + default: + assert(false); + } + }; + + std::vector decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const + { + std::vector x; + + switch(ls_leftover_vector_width) + { + case 0: + break; + case 1: + case 2: + case 3: + case 4: + case 8: + case 16: + x.push_back(ls_leftover_vector_width); + break; + case 5: + x.push_back(4); + x.push_back(1); + break; + case 6: + x.push_back(4); + x.push_back(2); + break; + case 7: + x.push_back(4); + x.push_back(3); + break; + case 9: + x.push_back(8); + x.push_back(1); + break; + case 10: + x.push_back(8); + x.push_back(2); + break; + case 11: + x.push_back(8); + x.push_back(3); + break; + case 12: + x.push_back(8); + x.push_back(4); + break; + case 13: + x.push_back(8); + x.push_back(4); + x.push_back(1); + break; + case 14: + x.push_back(8); + x.push_back(4); + x.push_back(2); + break; + case 15: + x.push_back(8); + x.push_back(4); + x.push_back(3); + break; + + default: + assert(false); + } + return x; + } + + std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& address) + { + switch(type) + { + case GpuLoadStoreType::Load: + if(vector_width != 1) + { + return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; + } + else + { + return data + " = *(" + address + ")"; + } + break; + case GpuLoadStoreType::Store: + if(vector_width != 1) + { + return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; + } + else + { + return "*(" + address + ") = " + data; + } + break; + default: + std::cout << "Unsupported GpuLoadStoreType" << std::endl; + assert(false); + return ""; + } + } + + std::string to_ls_buffer_address(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const + { + auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); + assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr); + const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); + const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); + + std::string address; + address += "(__global "; + address += dst_type; + address += "*)("; + address += ptr_buf; + if(x != "0" && (_mapper.is_one_component_x() != true)) + { + address += " + ("; + address += x + ") * sizeof(" + dst_type + ")"; + } + if(y != "0" && (_mapper.is_one_component_y() != true)) + { + const std::string stride_y = _mapper.tensor_component_stride_y(); + address += " + ("; + address += y + ")"; + address += " * "; + address += stride_y; + } + if(z != "0" && (_mapper.is_one_component_z() != true)) + { + const std::string stride_z = _mapper.tensor_component_stride_z(); + address += " + ("; + address += z + ")"; + address += " * "; + address += stride_z; + } + if(b != "0" && (_mapper.is_one_component_batch() != true)) + { + const std::string stride_b = _mapper.tensor_component_stride_batch(); + address += " + ("; + address += b + ")"; + address += " * "; + address += stride_b; + } + address += ")"; + return address; + } +}; + +class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter +{ +public: + static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type, IVectorTile *dst) + { + CKW_UNUSED(x); + + if(dst->format().w != 4) + { + return false; + } + if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None) + { + return false; + } + if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None) + { + return false; + } + if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load) + { + return false; + } + if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store) + { + return false; + } + if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16)) + { + return false; + } + return true; + /* + - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4 + - z: Only GpuSamplerAddressModeZ::None is supported + */ + } + ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type) + { + } + + ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default; + ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default; + + void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override + { + assert(validate(_writer, _mapper, _type, dst)); + + _dst = dst; + _ls_width_full = dst->format().w; + _coord_x = x->scalar(0, 0).str; + _coord_z = z->scalar(0, 0).str; + _coord_b = b->scalar(0, 0).str; + + /* + if(y) + { + // full load/store width + } + else + { + // no load/store + } + */ + } + + void write(const std::pair& y) override + { + int32_t idx_y = y.first; + std::string coord_y = y.second; + + // The only check required is on Y. + out_of_bound_initialize_y(coord_y); + + const std::string dst = _dst->vector(idx_y).str; + const std::string sampler = to_ls_image2d_sampler(); + const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b); + const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord); + + _writer->write_text(ls_buf); + _writer->write_text(";\n"); + + out_of_bound_finalize_y(dst); + } + + void finalize() override + { + } +private: + IVectorTile* _dst { nullptr }; + int32_t _ls_width_full { 0 }; + std::string _coord_x {}; + std::string _coord_z {}; + std::string _coord_b {}; + + void out_of_bound_initialize_y(std::string& coord) + { + std::string max = ""; + + const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; + + switch(address_mode_y) + { + case TensorSamplerAddressModeY::Skip: + max = _mapper.tensor_component_y(); + _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); + _writer->compound_statement_begin(); + break; + case TensorSamplerAddressModeY::SkipMinEdgeOnly: + _writer->write_text("if(" + coord + " >= 0)\n"); + _writer->compound_statement_begin(); + break; + case TensorSamplerAddressModeY::SkipMaxEdgeOnly: + max = _mapper.tensor_component_y(); + _writer->write_text("if(" + coord + " < " + max + ")\n"); + _writer->compound_statement_begin(); + break; + case TensorSamplerAddressModeY::ClampToBorder: + case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: + case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: + case TensorSamplerAddressModeY::ClampToNearest: + case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: + case TensorSamplerAddressModeY::ClampToMinEdgeOnly: + case TensorSamplerAddressModeY::None: + break; + default: + std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl; + assert(false); + } + }; + + void out_of_bound_finalize_y(const std::string& dst) + { + CKW_UNUSED(dst); + + const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; + + switch(address_mode_y) + { + case TensorSamplerAddressModeY::Skip: + case TensorSamplerAddressModeY::SkipMinEdgeOnly: + case TensorSamplerAddressModeY::SkipMaxEdgeOnly: + _writer->compound_statement_end(); + break; + + default: + assert(false); + } + }; + + std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& sampler, const std::string& coord) + { + CKW_UNUSED(vector_width); + + auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); + const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage); + // const DataType dt = _dst->format().dt; + const std::string post_fix = _dst->format().dt == DataType::Fp32? "f" : "h"; + + switch(type) + { + case GpuLoadStoreType::Load: + return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")"; + break; + case GpuLoadStoreType::Store: + return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")"; + default: + assert(false); + std::cout << "Unsupported GpuLoadStoreType" << std::endl; + assert(false); + return ""; + } + } + + std::string to_ls_image2d_sampler() const + { + const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; + + switch(address_mode_y) + { + case TensorSamplerAddressModeY::None: + return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; + case TensorSamplerAddressModeY::Skip: + case TensorSamplerAddressModeY::SkipMinEdgeOnly: + case TensorSamplerAddressModeY::SkipMaxEdgeOnly: + case TensorSamplerAddressModeY::ClampToBorder: + case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: + case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: + return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST"; + case TensorSamplerAddressModeY::ClampToNearest: + case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: + case TensorSamplerAddressModeY::ClampToMinEdgeOnly: + return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST"; + default: + std::cout << "Unsupported address_mode_coord" << std::endl; + assert(false); + return ""; + } + } + + std::string to_ls_image2d_coord(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const + { + std::string coord_x = "(" + x + ") >> 2"; + std::string coord_y = "("; + + if(y != "0" && (_mapper.is_one_component_y() != true)) + { + coord_y += y; + } + if(z != "0" && (_mapper.is_one_component_z() != true)) + { + const std::string dim = _mapper.tensor_component_y(); + coord_y += " + ("; + coord_y += z + ")"; + coord_y += " * "; + coord_y += dim; + } + if(b != "0" && (_mapper.is_one_component_batch() != true)) + { + const std::string dim0 = _mapper.tensor_component_y(); + const std::string dim1 = _mapper.tensor_component_z(); + coord_y += " + ("; + coord_y += b + ")"; + coord_y += " * "; + coord_y += dim0; + coord_y += " * "; + coord_y += dim1; + } + coord_y += ")"; + return "(int2)(" + coord_x + ", " + coord_y + ")"; + } +}; + +/** IGpuLoadStoreHelperWriter factory class */ +class ClLoadStoreHelperWriterFactory final +{ +public: + /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper + * + * + * @return IGpuLoadStoreHelperWriter + */ + static std::unique_ptr create(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) + { + const auto tensor_storage = mapper.gpu_sampler().storage; + switch(tensor_storage) + { + case GpuSamplerTensorStorage::BufferUint8Ptr: + return std::make_unique(x, mapper, type); + case GpuSamplerTensorStorage::Image2dReadOnly: + case GpuSamplerTensorStorage::Image2dWriteOnly: + return std::make_unique(x, mapper, type); + default: + std::cout << "Unsupported Gpu tensor storage" << std::endl; + assert(false); + return nullptr; + } + } +}; + +// This utility method needs to go in utils.h +inline bool is_tile_scalar(IVectorTile* x) +{ + return x->format().w == 1 && x->format().h == 1; +} + +class ClKernelWriter : public IGpuKernelWriter +{ +public: + ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) + { + _data = x; + _attr = attr; + } + + ClKernelWriter(const ClKernelWriter &) = default; + ClKernelWriter &operator=(const ClKernelWriter &) = default; + + // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure + // there are no conflicts or ambiguity in the code + void set_IdSpace(int32_t id) override + { + _data->tiles.set_IdSpace(id); + _data->arguments.set_IdSpace(id); + } + + void import_tile(const std::string& dst_name, const IVectorTile *src) override + { + _data->tiles.insert(dst_name, src); + } + + void declare_argument(const std::string& name, const TensorInfo& tensor) override + { + assert(_data->arguments[name] == nullptr); + _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value); + } + + void declare_tile(const std::string& name, const TileInfo& format) override + { + assert(_data->tiles[name] == nullptr); + _data->tiles.insert(name, format); + + IVectorTile *x = _data->tiles[name]; + + for(auto &t : x->underlying_source_variables()) + { + _data->code += t.type.str + " " + t.str + ";\n"; + } + } + + void declare_const_tile(const std::string& name, const std::vector>& in, DataType dt) override + { + assert(_data->tiles[name] == nullptr); + _data->tiles.insert(name, in, dt); + // Note: A constant does not need to be declared in the code + } + + void write_text(const std::string& x) override + { + _data->code += x; + } + + void compound_statement_begin() override + { + _data->tiles.increment_registry_level(); + _data->code += "{\n"; + } + + void compound_statement_end() override + { + _data->tiles.decrement_registry_level(); + _data->code += "}\n"; + } + + void op_get_global_id(const Operand& dst_var, int32_t dim) override + { + assert(dst_var.type() == OperandType::Tile); + assert(_data->tiles.has_tile(dst_var.value())); + assert(_data->tiles[dst_var.value()]->format().w == 1 && + _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable + + auto var = _data->tiles[dst_var.value()]; + + _data->code += var->scalar(0, 0).str; + _data->code += " = get_global_id("; + _data->code += std::to_string(dim); + _data->code += ");\n"; + }; + + void op_get_global_coord(const Operand& o_dst, const Operand& o_step, const TensorOperand& o_tensor, int32_t dim) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto dst = operands.unpack(o_dst); + auto step = operands.unpack(o_step); + + // Validation: Check that x, y and z are scalar + + TensorOperandUnpacker tensor_operands(_data->arguments); + auto tensor = tensor_operands.unpack(o_tensor); + auto gpu_sampler = o_tensor.sampler(); + + GpuTensor3dMapper mapper(tensor, gpu_sampler); + + switch (dim) + { + case 0: + if(mapper.is_one_component_x()) + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = 0;\n"; + } + else + { + if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + { + // Validation: Check: fixed tensor shape + // TO BE CHANGED + _data->code += dst->scalar(0, 0).str; + _data->code += " = get_global_id(0) * "; + _data->code += step->scalar(0, 0).str; + _data->code += ";\n"; + } + else + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = get_global_id(0) * "; + _data->code += step->scalar(0, 0).str; + _data->code += ";\n"; + } + } + break; + case 1: + if(mapper.is_one_component_y()) + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = 0;\n"; + } + else + { + if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) + { + + } + else + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = get_global_id(1) * "; + _data->code += step->scalar(0, 0).str; + _data->code += ";\n"; + } + } + break; + case 2: + if(mapper.is_one_component_z()) + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = 0;\n"; + } + else + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = get_global_id(2) * "; + _data->code += step->scalar(0, 0).str; + _data->code += ";\n"; + } + break; + default: + break; + } + }; + + void op_get_global_batch(const Operand& o_dst, const TensorOperand& o_tensor) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto dst = operands.unpack(o_dst); + + TensorOperandUnpacker tensor_operands(_data->arguments); + auto tensor = tensor_operands.unpack(o_tensor); + auto gpu_sampler = o_tensor.sampler(); + + GpuTensor3dMapper mapper(tensor, gpu_sampler); + + if(mapper.is_one_component_batch()) + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = 0;\n"; + } + else + { + std::cout << "Unsupported batched computation" << std::endl; + assert(false); + } + }; + + void op_get_global_size(const Operand& dst_var, int32_t dim) override + { + assert(dst_var.type() == OperandType::Tile); + assert(_data->tiles.has_tile(dst_var.value())); + assert(_data->tiles[dst_var.value()]->format().w == 1 && + _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable + + auto var = _data->tiles[dst_var.value()]; + + _data->code += var->scalar(0, 0).str; + _data->code += " = get_global_size("; + _data->code += std::to_string(dim); + _data->code += ");\n"; + } + + void op_binary_expression(const Operand& dst_name, const Operand& lhs_name, BinaryOp op, const Operand& rhs_name) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto lhs = operands.unpack(lhs_name); + auto rhs = operands.unpack(rhs_name); + auto dst = operands.unpack(dst_name); + + const int32_t dst_w = dst->format().w; + const int32_t dst_h = dst->format().h; + assert(lhs != nullptr); + const int32_t lhs_w = lhs->format().w; + const int32_t rhs_w = rhs->format().w; + + if(op == BinaryOp::MatMul_Nt_T) + { + assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16)); + for(int32_t y = 0; y < dst_h; ++y) + { + for(int32_t x = 0; x < dst_w; ++x) + { + for(int32_t k = 0; k < lhs_w; ++k) + { + _data->code += dst->scalar(x, y).str; + _data->code += " = fma("; + _data->code += lhs->scalar(k, y).str; + _data->code += ", "; + _data->code += rhs->scalar(k, x).str; + _data->code += ", "; + _data->code += dst->scalar(x, y).str; + _data->code += ");\n"; + } + } + } + + return; + } + + bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1; + bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1; + + std::string lhs_prefix = broadcast_lhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; + std::string rhs_prefix = broadcast_rhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; + std::string op_str = to_string(op); + + // Broadcasting on Y is automatic + for(int32_t y = 0; y < dst_h; ++y) + { + _data->code += dst->vector(y).str; + _data->code += " = "; + _data->code += lhs_prefix + lhs->vector(y).str; + _data->code += " "; + _data->code += op_str; + _data->code += " "; + _data->code += rhs_prefix + rhs->vector(y).str; + _data->code += ";\n"; + } + }; + + void op_cast_expression(const Operand& o_dst, const Operand &o_src, ConvertPolicy policy) override + { + CKW_UNUSED(policy); + + OperandUnpacker operands(_data->tiles, _data->arguments); + auto src = operands.unpack(o_src); + auto dst = operands.unpack(o_dst); + + // const int32_t dst_w = dst->format().w; + const int32_t dst_h = dst->format().h; + const std::string dt = dst->scalar(0, 0).type.str; + + // Broadcasting on Y is automatic + for(int32_t y = 0; y < dst_h; ++y) + { + _data->code += dst->vector(y).str; + _data->code += " = convert_" + dt + "("; + _data->code += src->vector(y).str; + _data->code += ");\n"; + } + }; + + void op_assign(const Operand& dst_name, const Operand& src_name) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto src = operands.unpack(src_name); + auto dst = operands.unpack(dst_name); + + const int32_t dst_w = dst->format().w; + const int32_t dst_h = dst->format().h; + const int32_t src_w = src->format().w; + // const int32_t src_h = src->format().h; + const std::string dt = dst->scalar(0, 0).type.str; + + bool broadcast_src_x = dst_w != 1 && src_w == 1; + + std::string src_prefix = broadcast_src_x? "(" + dt + ")" : ""; + + // Broadcasting on Y is automatic + for(int32_t y = 0; y < dst_h; ++y) + { + _data->code += dst->vector(y).str; + _data->code += " = "; + _data->code += src_prefix + src->vector(y).str; + _data->code += ";\n"; + } + } + + void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto src = operands.unpack(src_name); + auto dst = operands.unpack(dst_name); + + const int32_t dst_w = dst->format().w; + const int32_t dst_h = dst->format().h; + const int32_t src_w = src->format().w; + // const int32_t src_h = src->format().h; + const std::string dt = dst->scalar(0, 0).type.str; + + bool broadcast_src_x = dst_w != 1 && src_w == 1; + + std::string src_prefix = broadcast_src_x? "(" + dt + ")" : ""; + + // Broadcasting on Y is automatic + for(int32_t y = 0; y < dst_h; ++y) + { + _data->code += dst->vector(y).str; + _data->code += " = "; + + switch(func) + { + case ScalarUnaryFunction::Exp: + _data->code += "exp("; + break; + + default: + CKW_ASSERT(false); + } + + _data->code += src_prefix + src->vector(y).str; + _data->code += ");\n"; + } + } + + void op_if(const Operand& o_lhs, BinaryOp op, const Operand& o_rhs) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto lhs = operands.unpack(o_lhs); + auto rhs = operands.unpack(o_rhs); + + assert(is_tile_scalar(lhs)); + assert(is_tile_scalar(rhs)); + + _data->code += "if("; + _data->code += lhs->scalar(0, 0).str; + _data->code += " "; + _data->code += to_string(op); + _data->code += " "; + _data->code += rhs->scalar(0, 0).str; + _data->code += ")\n"; + } + + void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, AssignmentOp update_op, const Operand& update_value_name) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto var = operands.unpack(var_name); + auto cond_value = operands.unpack(cond_value_name); + auto update_value = operands.unpack(update_value_name); + + const int32_t dst_w = var->format().w; + const int32_t dst_h = var->format().h; + + // It must be a scalar variable + CKW_UNUSED(dst_w, dst_h); + assert(dst_w == 1); + assert(dst_h == 1); + + _data->code += "for(; " ; + _data->code += var->scalar(0, 0).str; + _data->code += " "; + _data->code += to_string(cond_op); + _data->code += " " + cond_value->scalar(0, 0).str + "; "; + _data->code += var->scalar(0, 0).str; + _data->code += " "; + _data->code += to_string(update_op); + _data->code += " " + update_value->scalar(0, 0).str + ")"; + _data->code += "\n"; + } + + void op_load_immediate(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_y, const Operand& o_z, const Operand& o_batch_idx, const Operand& dilation_y) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto dst = operands.unpack(o_dst); + auto x = operands.unpack(o_x); + auto y = operands.unpack(o_y); + auto z = operands.unpack(o_z); + auto dil_y = operands.unpack(dilation_y); + auto b = operands.unpack(o_batch_idx); + + TensorOperandUnpacker tensor_operands(_data->arguments); + auto tensor = tensor_operands.unpack(o_tensor); + auto gpu_sampler = o_tensor.sampler(); + + GpuTensor3dMapper mapper(tensor, gpu_sampler); + + auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load); + + // Initialize the constant part + load_writer->initialize(dst, x, z, b); + + for(int i = 0; i < dst->format().h; ++i) + { + std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i); + if(dil_y->scalar(0, 0).str != "1") + { + coord_y += " * " + dil_y->scalar(0, 0).str; + } + load_writer->write(std::make_pair(i, coord_y)); + } + + load_writer->finalize(); + } + + void op_load_indirect(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_indirect_h, const Operand& o_z, const Operand& o_batch_idx) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto dst = operands.unpack(o_dst); + auto x = operands.unpack(o_x); + auto y_ind = operands.unpack(o_indirect_h); + auto z = operands.unpack(o_z); + auto b = operands.unpack(o_batch_idx); + + TensorOperandUnpacker tensor_operands(_data->arguments); + auto tensor = tensor_operands.unpack(o_tensor); + auto gpu_sampler = o_tensor.sampler(); + + GpuTensor3dMapper mapper(tensor, gpu_sampler); + + auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load); + + // Initialize the constant part + load_writer->initialize(dst, x, z, b); + + for(int i = 0; i < dst->format().h; ++i) + { + load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str)); + } + + load_writer->finalize(); + } + + void op_store_immediate(const TensorOperand& tensor_name, const Operand& src_name, const Operand& x_name, const Operand& y_name, const Operand& z_name, const Operand& batch_index_name) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto src = operands.unpack(src_name); + auto x = operands.unpack(x_name); + auto y = operands.unpack(y_name); + auto z = operands.unpack(z_name); + auto b = operands.unpack(batch_index_name); + + TensorOperandUnpacker tensor_operands(_data->arguments); + auto tensor = tensor_operands.unpack(tensor_name); + auto gpu_sampler = tensor_name.sampler(); + + GpuTensor3dMapper mapper(tensor, gpu_sampler); + + auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store); + + // Initialize the constant part + store_writer->initialize(src, x, z, b); + + int32_t tile_h = src->format().h; + + for(int m0 = tile_h - 1; m0 >= 0; m0--) + { + store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0))); + } + + store_writer->finalize(); + } + + void op_return() override + { + _data->code += "return;\n"; + } + + void util_get_indirect_buffer(const Operand& o_dst, const TensorOperand& o_tensor, const Operand& o_x, const Operand& o_y, const Operand& o_x_off, const Operand& o_y_off) override + { + OperandUnpacker operands(_data->tiles, _data->arguments); + auto dst = operands.unpack(o_dst); + auto x = operands.unpack(o_x); + auto y = operands.unpack(o_y); + auto x_off = operands.unpack(o_x_off); + auto y_off = operands.unpack(o_y_off); + + TensorOperandUnpacker tensor_operands(_data->arguments); + auto tensor = tensor_operands.unpack(o_tensor); + + assert(dst->format().w == 1); + assert(x->format().w == 1); + assert(y->format().w == 1); + assert(x_off->format().w == 1); + assert(y_off->format().w == 1); + assert(dst->format().dt == DataType::Int32); + assert(x->format().dt == DataType::Int32); + assert(y->format().dt == DataType::Int32); + assert(x_off->format().dt == DataType::Int32); + assert(y_off->format().dt == DataType::Int32); + + const std::string width = tensor->component(TensorComponent::W); + const std::string height = tensor->component(TensorComponent::H); + const std::string wxh = tensor->component(TensorComponent::WxH); + /* + int x_s; + int y_s; + x_s = (xi_0 + x_k); + y_s = (yi_0 + y_k); + mi_0 = x_s + y_s * width + b * widthxheight; + mi_0 = select(-1, mi_0, x_s >= 0); + mi_0 = select(-1, mi_0, y_s >= 0); + mi_0 = select(-1, mi_0, x_s < 128); + mi_0 = select(-1, mi_0, y_s < 128); + */ + compound_statement_begin(); + declare_tile("_x_s", TileInfo(DataType::Int32)); + declare_tile("_y_s", TileInfo(DataType::Int32)); + auto x_s = operands.unpack(Operand("_x_s")); + auto y_s = operands.unpack(Operand("_y_s")); + for(int i = 0; i < dst->format().h; ++i) + { + // x_s = (xi_0 + x_k); + // y_s = (yi_0 + y_k); + _data->code += x_s->scalar(0, i).str; + _data->code += " = ("; + _data->code += x->scalar(0, i).str; + _data->code += " + "; + _data->code += x_off->scalar(0, i).str; + _data->code += ");\n"; + _data->code += y_s->scalar(0, i).str; + _data->code += " = ("; + _data->code += y->scalar(0, i).str; + _data->code += " + "; + _data->code += y_off->scalar(0, i).str; + _data->code += ");\n"; + // mi_0 = x_s + y_s * width; + _data->code += dst->scalar(0, i).str; + _data->code += " = "; + _data->code += x_s->scalar(0, i).str; + _data->code += " + "; + _data->code += y_s->scalar(0, i).str; + _data->code += " * " + width + ";\n"; + // mi_0 = select(wxh, mi_0, x_s >= 0); + _data->code += dst->scalar(0, i).str; + _data->code += " = select(-1, "; + _data->code += dst->scalar(0, i).str; + _data->code += ", "; + _data->code += x_s->scalar(0, i).str; + _data->code += " >= 0);\n"; + // mi_0 = select(wxh, mi_0, y_s >= 0); + _data->code += dst->scalar(0, i).str; + _data->code += " = select(-1, "; + _data->code += dst->scalar(0, i).str; + _data->code += ", "; + _data->code += y_s->scalar(0, i).str; + _data->code += " >= 0);\n"; + // mi_0 = select(wxh, mi_0, x_s < width); + _data->code += dst->scalar(0, i).str; + _data->code += " = select(-1, "; + _data->code += dst->scalar(0, i).str; + _data->code += ", "; + _data->code += x_s->scalar(0, i).str; + _data->code += " < "; + _data->code += width + ");\n"; + // mi_0 = select(wxh, mi_0, y_s < height); + _data->code += dst->scalar(0, i).str; + _data->code += " = select(-1, "; + _data->code += dst->scalar(0, i).str; + _data->code += ", "; + _data->code += y_s->scalar(0, i).str; + _data->code += " < "; + _data->code += height + ");\n"; + } + compound_statement_end(); + } + +private: + GpuKernelWriterDataHolder* _data { nullptr }; + GpuKernelWriterAttribute * _attr { nullptr }; +}; + +/** IGpuKernelWriter factory class */ +class GpuKernelWriterFactory final +{ +public: + /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language + * + * @param[in] gpu GPU target + * + * @return IGpuKernelWriter + */ + static std::unique_ptr create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) + { + switch(x->programming_language()) + { + case GpuTargetLanguage::OpenCL: + return std::make_unique(attr, x); + default: + std::cout << "Unsupported Gpu programming language" << std::endl; + assert(false); + return nullptr; + } + } +}; + +inline int32_t adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx) +{ + auto tensor = tensor_info_id->shape; + + int32_t dim[3] = {0}; + + switch(tensor_format) + { + case TensorSamplerFormat::C_W_H: + dim[0] = tensor[0]; + dim[1] = tensor[1]; + dim[2] = tensor[2]; + break; + case TensorSamplerFormat::C_WH_1: + dim[0] = tensor[0]; + dim[1] = tensor[1] * tensor[2]; + dim[2] = 1; + break; + default: + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + break; + } + + return std::min(step, dim[idx]); +} + +} // namespace prototype +} // namespace ckw + +#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H diff --git a/compute_kernel_writer/prototype/src/TensorInfo.cpp b/compute_kernel_writer/prototype/src/TensorInfo.cpp new file mode 100644 index 0000000000..561c126469 --- /dev/null +++ b/compute_kernel_writer/prototype/src/TensorInfo.cpp @@ -0,0 +1,77 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ckw/TensorInfo.h" + +namespace ckw +{ +TensorInfo::TensorInfo(DataType dt, const TensorShape &shape, TensorDataLayout dl, int32_t id) + : _shape(shape), _dt(dt), _dl(dl), _id(id) +{ +} + +TensorInfo &TensorInfo::shape(const TensorShape &shape) +{ + _shape = shape; + return *this; +} + +TensorShape TensorInfo::shape() const +{ + return _shape; +} + +TensorInfo &TensorInfo::data_type(DataType dt) +{ + _dt = dt; + return *this; +} + +DataType TensorInfo::data_type() const +{ + return _dt; +} + +TensorInfo &TensorInfo::data_layout(TensorDataLayout dl) +{ + _dl = dl; + return *this; +} + +TensorDataLayout TensorInfo::data_layout() const +{ + return _dl; +} + +TensorInfo &TensorInfo::id(int32_t id) +{ + _id = id; + return *this; +} + +int32_t TensorInfo::id() const +{ + return _id; +} +} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TensorOperand.cpp b/compute_kernel_writer/prototype/src/TensorOperand.cpp new file mode 100644 index 0000000000..00ecc3824e --- /dev/null +++ b/compute_kernel_writer/prototype/src/TensorOperand.cpp @@ -0,0 +1,247 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ckw/TensorOperand.h" +#include "ckw/Error.h" +#include "ckw/Kernel.h" +#include "ckw/TileOperand.h" +#include "src/Prototype.h" + +namespace ckw +{ + +namespace +{ + +inline TensorComponentOperand &get_or_create_component(std::unique_ptr &ptr, const ::std::string &name, TensorComponent component) +{ + if(ptr == nullptr) + { + ptr = std::make_unique(name, component); + } + + return *ptr; +} + +} // namespace + +// ================================================================================================= +// TensorOperand +// ================================================================================================= + +TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info) + : OperandBase(name), _info(info) +{ +} + +prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const +{ + CKW_UNUSED(writer); + return { name() }; +} + +const TensorInfo &TensorOperand::info() const +{ + return _info; +} + +TensorInfo &TensorOperand::info() +{ + return _info; +} + +DataType TensorOperand::data_type() const +{ + return _info.data_type(); +} + +bool TensorOperand::is_constant() const +{ + return false; +} + +const TileOperand &TensorOperand::tile() const +{ + return *_tile; +} + +TileOperand &TensorOperand::tile() +{ + return *_tile; +} + +TensorOperand &TensorOperand::tile(TileOperand &tile) +{ + _tile = &tile; + return *this; +} + +const TensorTileSampler &TensorOperand::tile_sampler() const +{ + return _tile_sampler; +} + +TensorTileSampler &TensorOperand::tile_sampler() +{ + return _tile_sampler; +} + +TensorOperand &TensorOperand::tile_sampler(const TensorTileSampler &value) +{ + _tile_sampler = value; + return *this; +} + +TileOperand &TensorOperand::stride1() +{ + return get_or_create_component(_stride1, name(), TensorComponent::Stride1); +} + +TileOperand &TensorOperand::stride2() +{ + return get_or_create_component(_stride2, name(), TensorComponent::Stride2); +} + +TileOperand &TensorOperand::stride3() +{ + return get_or_create_component(_stride3, name(), TensorComponent::Stride3); +} + +TileOperand &TensorOperand::stride4() +{ + return get_or_create_component(_stride4, name(), TensorComponent::Stride4); +} + +TileOperand &TensorOperand::dim0() +{ + return get_or_create_component(_dim0, name(), TensorComponent::Dim0); +} + +TileOperand &TensorOperand::dim1() +{ + return get_or_create_component(_dim1, name(), TensorComponent::Dim1); +} + +TileOperand &TensorOperand::dim2() +{ + return get_or_create_component(_dim2, name(), TensorComponent::Dim2); +} + +TileOperand &TensorOperand::dim3() +{ + return get_or_create_component(_dim3, name(), TensorComponent::Dim3); +} + +TileOperand &TensorOperand::dim4() +{ + return get_or_create_component(_dim4, name(), TensorComponent::Dim4); +} + +TileOperand &TensorOperand::dim1_dim2() +{ + return get_or_create_component(_dim1_dim2, name(), TensorComponent::Dim1xDim2); +} + +TileOperand &TensorOperand::dim1_dim2_dim3() +{ + return get_or_create_component(_dim1_dim2_dim3, name(), TensorComponent::Dim1xDim2xDim3); +} + +TileOperand &TensorOperand::offset_first_element_in_bytes() +{ + return get_or_create_component(_offset_first_element_in_bytes, name(), TensorComponent::OffsetFirstElement); +} + +// ================================================================================================= +// TensorComponentOperand +// ================================================================================================= + +TensorComponentOperand::TensorComponentOperand(const ::std::string &name, TensorComponent component) + : TileOperand(name, DataType::Int32), _component(component) +{ +} + +prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const +{ + CKW_UNUSED(writer); + prototype::OperandType type{ prototype::OperandType::Unknown }; + + switch(_component) + { + case TensorComponent::OffsetFirstElement: + type = prototype::OperandType::TensorDataOffset; + break; + + case TensorComponent::Stride1: + type = prototype::OperandType::TensorStride1; + break; + + case TensorComponent::Stride2: + type = prototype::OperandType::TensorStride2; + break; + + case TensorComponent::Stride3: + type = prototype::OperandType::TensorStride3; + break; + + case TensorComponent::Stride4: + type = prototype::OperandType::TensorStride4; + break; + + case TensorComponent::Dim0: + type = prototype::OperandType::TensorDim0; + break; + + case TensorComponent::Dim1: + type = prototype::OperandType::TensorDim1; + break; + + case TensorComponent::Dim2: + type = prototype::OperandType::TensorDim2; + break; + + case TensorComponent::Dim3: + type = prototype::OperandType::TensorDim3; + break; + + case TensorComponent::Dim4: + type = prototype::OperandType::TensorDim4; + break; + + case TensorComponent::Dim1xDim2: + type = prototype::OperandType::TensorDim1xDim2; + break; + + case TensorComponent::Dim1xDim2xDim3: + type = prototype::OperandType::TensorDim1xDim2xDim3; + break; + + default: + CKW_ASSERT(false); + } + + return prototype::Operand(name(), type); +} + +} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp new file mode 100644 index 0000000000..143d550dec --- /dev/null +++ b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp @@ -0,0 +1,167 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ckw/TensorTileSampler.h" +#include "ckw/TileOperand.h" +#include "ckw/Types.h" + +namespace ckw +{ + +TensorTileSampler::TensorTileSampler() +{ +} + +TensorTileSampler::TensorTileSampler( + TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z) + : _x(&x), _y(&y), _z(&z), _b(&b), _height(0), _width(0), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z) +{ +} + +TensorTileSampler::TensorTileSampler( + TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, + int32_t height, int32_t width, + TensorSamplerFormat format, + TensorSamplerAddressModeX address_mode_x, + TensorSamplerAddressModeY address_mode_y, + TensorSamplerAddressModeZ address_mode_z) + : _x(&x), _y(&y), _z(&z), _b(&b), _height(height), _width(width), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z) +{ +} + +const TileOperand &TensorTileSampler::x() const +{ + return *_x; +} + +TensorTileSampler &TensorTileSampler::x(TileOperand &x) +{ + _x = &x; + return *this; +} + +const TileOperand &TensorTileSampler::y() const +{ + return *_y; +} + +TensorTileSampler &TensorTileSampler::y(TileOperand &y) +{ + _y = &y; + return *this; +} + +const TileOperand &TensorTileSampler::z() const +{ + return *_z; +} + +TensorTileSampler &TensorTileSampler::z(TileOperand &z) +{ + _z = &z; + return *this; +} + +const TileOperand &TensorTileSampler::b() const +{ + return *_b; +} + +TensorTileSampler &TensorTileSampler::b(TileOperand &b) +{ + _b = &b; + return *this; +} + +int32_t TensorTileSampler::width() const +{ + return _width; +} + +TensorTileSampler &TensorTileSampler::width(int32_t width) +{ + _width = width; + return *this; +} + +int32_t TensorTileSampler::height() const +{ + return _height; +} + +TensorTileSampler &TensorTileSampler::height(int32_t height) +{ + _height = height; + return *this; +} + +TensorSamplerFormat TensorTileSampler::format() const +{ + return _format; +} + +TensorTileSampler &TensorTileSampler::format(TensorSamplerFormat format) +{ + _format = format; + return *this; +} + +TensorSamplerAddressModeX TensorTileSampler::address_mode_x() const +{ + return _address_mode_x; +} + +TensorTileSampler &TensorTileSampler::address_mode_x(TensorSamplerAddressModeX address_mode_x) +{ + _address_mode_x = address_mode_x; + return *this; +} + +TensorSamplerAddressModeY TensorTileSampler::address_mode_y() const +{ + return _address_mode_y; +} + +TensorTileSampler &TensorTileSampler::address_mode_y(TensorSamplerAddressModeY address_mode_y) +{ + _address_mode_y = address_mode_y; + return *this; +} + +TensorSamplerAddressModeZ TensorTileSampler::address_mode_z() const +{ + return _address_mode_z; +} + +TensorTileSampler &TensorTileSampler::address_mode_z(TensorSamplerAddressModeZ address_mode_z) +{ + _address_mode_z = address_mode_z; + return *this; +} + +} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TileInfo.cpp b/compute_kernel_writer/prototype/src/TileInfo.cpp new file mode 100644 index 0000000000..7d8b2654ef --- /dev/null +++ b/compute_kernel_writer/prototype/src/TileInfo.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ckw/TileInfo.h" + +namespace ckw +{ +TileInfo::TileInfo(DataType dt) + : _dt(dt), _shape({{1, 1}}) +{ +} + +TileInfo::TileInfo(DataType dt, int32_t w) + : _dt(dt), _shape({{w, 1}}) +{ +} + +TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) + : _dt(dt), _shape({{w, h}}) +{ +} + +TileInfo &TileInfo::width(int32_t w) +{ + _shape[kTileWidthIdx] = w; + return *this; +} + +int32_t TileInfo::width() const +{ + return _shape[kTileWidthIdx]; +} + +TileInfo &TileInfo::height(int32_t h) +{ + _shape[kTileHeightIdx] = h; + return *this; +} + +int32_t TileInfo::height() const +{ + return _shape[kTileHeightIdx]; +} + +TileInfo &TileInfo::data_type(DataType dt) +{ + _dt = dt; + return *this; +} + +DataType TileInfo::data_type() const +{ + return _dt; +} +} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp new file mode 100644 index 0000000000..091947628d --- /dev/null +++ b/compute_kernel_writer/prototype/src/TileOperand.cpp @@ -0,0 +1,104 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "ckw/TileOperand.h" +#include "ckw/Error.h" +#include "src/Prototype.h" + +namespace ckw +{ + +TileOperand::TileOperand(const std::string &name, const TileInfo &info) + : OperandBase(name), _info(info), _value{ 0 }, _constant(false) +{ +} + +TileOperand::TileOperand(const std::string &name, DataType data_type) + : OperandBase(name), _info(TileInfo{ data_type }), _value(0), _constant(false) +{ +} + +TileOperand::TileOperand(const std::string &name, int32_t value) + : OperandBase(name), _info(TileInfo{ DataType::Int32 }), _value(value), _constant(true) +{ +} + +TileOperand::TileOperand(const std::string &name, float value) + : OperandBase(name), _info(TileInfo{ DataType::Fp32 }), _value(value), _constant(true) +{ +} + +prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const +{ + CKW_UNUSED(writer); + + if(_constant) + { + switch(_info.data_type()) + { + case DataType::Int32: + return prototype::Operand(std::to_string(_value.get()), prototype::OperandType::ScalarInt32); + + case DataType::Fp32: + return prototype::Operand(std::to_string(_value.get()), prototype::OperandType::ScalarFp32); + + default: + CKW_ASSERT(false); + } + } + else + { + return prototype::Operand(name(), prototype::OperandType::Tile); + } +} + +const TileInfo &TileOperand::tile_info() const +{ + return _info; +} + +DataType TileOperand::data_type() const +{ + return _info.data_type(); +} + +bool TileOperand::is_constant() const +{ + return _constant; +} + +bool TileOperand::is_scalar() const +{ + return _info.width() == 1 && _info.height() == 1; +} + +ScalarValue TileOperand::scalar_value() const +{ + CKW_ASSERT(is_scalar()); + CKW_ASSERT(is_constant()); + + return _value; +} + +} // namespace ckw diff --git a/compute_kernel_writer/src/Kernel.cpp b/compute_kernel_writer/src/Kernel.cpp deleted file mode 100644 index bbf5c440a7..0000000000 --- a/compute_kernel_writer/src/Kernel.cpp +++ /dev/null @@ -1,61 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/Kernel.h" -#include "ckw/Types.h" -#include "src/Prototype.h" - -namespace ckw -{ - -Kernel::Kernel(const char *name, GpuTargetLanguage language) - : _name(name), _kernel(std::make_unique(language)), _operands{} -{ -} - -Kernel::~Kernel() -{ -} - -const std::string &Kernel::name() const -{ - return _name; -} - -const std::map> &Kernel::operands() const -{ - return _operands; -} - -std::map> &Kernel::operands() -{ - return _operands; -} - -prototype::GpuKernelWriterDataHolder *Kernel::impl() -{ - return _kernel.get(); -} - -} // namespace ckw diff --git a/compute_kernel_writer/src/KernelWriter.cpp b/compute_kernel_writer/src/KernelWriter.cpp deleted file mode 100644 index 5d79985e87..0000000000 --- a/compute_kernel_writer/src/KernelWriter.cpp +++ /dev/null @@ -1,227 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/KernelWriter.h" -#include "ckw/Error.h" -#include "ckw/TensorOperand.h" -#include "src/Prototype.h" - -#include - -namespace ckw -{ - -namespace -{ - -inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info) -{ - return prototype::TensorInfo{ info.shape(), info.data_type(), info.data_layout(), info.id() }; -} - -} // namespace - -// ================================================================================================= -// Constructors and destructor -// ================================================================================================= - -KernelWriter::KernelWriter(Kernel &kernel) - : _kernel(&kernel), - _impl_attr(std::make_unique()), - _impl(prototype::GpuKernelWriterFactory::create(_impl_attr.get(), kernel.impl())) -{ - _impl->set_IdSpace(1); -} - -KernelWriter::~KernelWriter() -{ -} - -// ================================================================================================= -// Scope management -// ================================================================================================= - -int32_t KernelWriter::id_space() const -{ - return _id_space; -} - -KernelWriter &KernelWriter::id_space(int32_t id_space) -{ - CKW_ASSERT(id_space <= _max_id_space); - - _id_space = id_space; - return *this; -} - -int32_t KernelWriter::next_id_space() -{ - id_space(++_max_id_space); - return _id_space; -} - -// ================================================================================================= -// Tensor and tile declaration -// ================================================================================================= - -TensorOperand &KernelWriter::create_tensor_argument(const char *name, const TensorInfo &info) -{ - const auto var_name = generate_variable_name(name); - - _impl->declare_argument(var_name, create_impl_tensor_info(info)); - - auto operand = new TensorOperand(var_name, info); - register_operand(operand, false); - - return *operand; -} - -TileOperand &KernelWriter::create_tile_argument(const char *name, int32_t value) -{ - const auto var_name = generate_variable_name(name); - - auto operand = new TileOperand(var_name, value); - register_operand(operand, false); - - return *operand; -} - -std::string KernelWriter::generate_variable_name(const char *name) const -{ - std::stringstream var_name; - - var_name << "_" << _id_space << "_" << name; - - return var_name.str(); -} - -void KernelWriter::register_operand(OperandBase *operand, bool declaring) -{ - const auto &name = operand->name(); - auto &operands = _kernel->operands(); - - CKW_ASSERT(operands.find(name) == operands.end()); - operands[name] = std::unique_ptr(operand); - - if(declaring && !operand->is_constant()) - { - const auto tile = reinterpret_cast(operand); - - const auto &info = tile->tile_info(); - _impl->declare_tile(tile->name(), prototype::TileInfo(info.data_type(), info.width(), info.height())); - } -} - -// ================================================================================================= -// Load and store -// ================================================================================================= - -void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::GpuSamplerTensorStorage::BufferUint8Ptr, - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); - - auto impl_x = sampler.x().create_impl_operand(_impl.get()); - auto impl_y = sampler.y().create_impl_operand(_impl.get()); - auto impl_z = sampler.z().create_impl_operand(_impl.get()); - auto impl_b = sampler.b().create_impl_operand(_impl.get()); - - auto impl_dst = tile.create_impl_operand(_impl.get()); - - _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b); -} - -void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{ - sampler.format(), - prototype::GpuSamplerTensorStorage::BufferUint8Ptr, - sampler.address_mode_x(), - sampler.address_mode_y(), - sampler.address_mode_z() }); - auto impl_src = tile.create_impl_operand(_impl.get()); - auto impl_x = sampler.x().create_impl_operand(_impl.get()); - auto impl_y = sampler.y().create_impl_operand(_impl.get()); - auto impl_z = sampler.z().create_impl_operand(_impl.get()); - auto impl_b = sampler.b().create_impl_operand(_impl.get()); - - _impl->op_store_immediate(impl_tensor, impl_src, impl_x, impl_y, impl_z, impl_b); -} - -// ================================================================================================= -// Data processing -// ================================================================================================= - -void KernelWriter::op_assign(TileOperand &dst, const TileOperand &src) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_assign(impl_dst, impl_src); -} - -void KernelWriter::op_binary_expression(TileOperand &dst, const TileOperand &lhs, const TileOperand &rhs, BinaryOp op) -{ - auto impl_lhs = lhs.create_impl_operand(_impl.get()); - auto impl_rhs = rhs.create_impl_operand(_impl.get()); - auto impl_dst = dst.create_impl_operand(_impl.get()); - - _impl->op_binary_expression(impl_dst, impl_lhs, op, impl_rhs); -} - -void KernelWriter::op_scalar_function(TileOperand &dst, const TileOperand &src, ScalarUnaryFunction opcode) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_scalar_function(impl_dst, impl_src, opcode); -} - -// ================================================================================================= -// Misc -// ================================================================================================= - -void KernelWriter::op_get_global_id(TileOperand &dst, int32_t dim) -{ - _impl->op_get_global_id(prototype::Operand(dst.name()), dim); -} - -// ================================================================================================= -// Code generation -// ================================================================================================= - -std::string KernelWriter::generate_code() -{ - return prototype::generate_code(*_kernel->impl(), _kernel->name()); -} - -} // namespace ckw diff --git a/compute_kernel_writer/src/OperandBase.cpp b/compute_kernel_writer/src/OperandBase.cpp deleted file mode 100644 index 59cf846cc7..0000000000 --- a/compute_kernel_writer/src/OperandBase.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/OperandBase.h" - -namespace ckw -{ - -OperandBase::OperandBase(const std::string &name) - : _name(name) -{ -} - -OperandBase::~OperandBase() -{ -} - -const std::string &OperandBase::name() const -{ - return _name; -} - -OperandBase &OperandBase::name(const std::string &name) -{ - _name = name; - return *this; -} - -} // namespace ckw diff --git a/compute_kernel_writer/src/Prototype.h b/compute_kernel_writer/src/Prototype.h deleted file mode 100644 index 45f1b3d464..0000000000 --- a/compute_kernel_writer/src/Prototype.h +++ /dev/null @@ -1,3767 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_SRC_PROTOTYPE_H -#define CKW_SRC_PROTOTYPE_H - -#include -#include -#include -#include // int32_t -#include // cout (to be removed) -#include // assert (to be removed) -#include -#include -#include -#include -#include -#include -#include - -#include "ckw/Types.h" -#include "ckw/TensorInfo.h" -#include "ckw/Error.h" - -namespace ckw -{ -namespace prototype { - -// Dummy data structure for Size2D -using Size2D = std::vector; - -// Dummy Status -using Status = void; - -enum class ComponentType : int32_t -{ - Complex = 0, - Simple = 1, - Unfusable = 2 -}; - -enum class GpuCompilationSpeed -{ - Fast = 0x00, // fast compilation may increase the latency of the network - Slow = 0x01 // slow compilation may decrease the latency of the network -}; - -enum class GpuExtensions -{ - Fp16, - Dot8, - Mmul, - FastMath -}; - -struct TensorInfo -{ - TensorShape shape { {0} }; - DataType data_type { DataType::Unknown }; - TensorDataLayout data_layout { TensorDataLayout::Nhwc }; - int32_t id { -1 }; -}; - -struct ComponentAttribute -{ - GpuCompilationSpeed compilation_speed {GpuCompilationSpeed::Fast}; - bool overwrite_tile { true }; -}; - -inline std::string data_type_to_cl_type(DataType dt) -{ - switch(dt) - { - case DataType::Fp32: - return "float"; - case DataType::Fp16: - return "half"; - case DataType::Int8: - return "char"; - case DataType::Uint8: - return "uchar"; - case DataType::Uint16: - return "ushort"; - case DataType::Int16: - return "short"; - case DataType::Uint32: - return "uint"; - case DataType::Int32: - return "int"; - case DataType::Bool: - return "bool"; - default: - assert(false); - return ""; - } -} - -inline int32_t width_to_cl_vector_size(int32_t width) -{ - switch(width) - { - case 1: - return 1; - case 2: - return 2; - case 3: - return 3; - case 4: - return 4; - case 5: - case 6: - case 7: - case 8: - return 8; - case 9: - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - case 16: - return 16; - default: - assert(false); - return 0; - } -} - -inline std::string get_cl_data_type(DataType dt, int32_t width) -{ - std::string data_type; - int32_t w = width_to_cl_vector_size(width); - data_type += data_type_to_cl_type(dt); - if(w != 1) - { - data_type += std::to_string(w); - } - return data_type; -} - -inline std::string to_opencl_store(int32_t vector_length) -{ - if(vector_length != 1) - { - return "vstore" + std::to_string(vector_length) + "("; - } - else - { - return "*("; - } -} - -struct TileInfo -{ - TileInfo() {} - TileInfo(DataType dt) : dt(dt), w(1), h(1) {} - TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1) {} - TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height) {} - DataType dt{ DataType::Unknown }; // Data type of the tile - int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels) - int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions) -}; - -inline std::ostream& operator << (std::ostream& o, const TileInfo& a) -{ - o << a.w << " x " << a.h; - return o; -} - -struct DataTypeAsString -{ - std::string str { "" }; - DataType dt { DataType::Unknown }; - int32_t size { 1 }; -}; - -struct ValueAsString -{ - std::string str { "" }; - DataTypeAsString type { }; -}; - -// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c -// A Tile is a collection of variables used to express a 2D data. -class IScalarTile -{ -public: - virtual ~IScalarTile() = default; - /** Method to get the scalar variable from a tile - * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge - * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge - * - * @return the scalar variable as a string - */ - virtual ValueAsString scalar(int32_t x, int32_t y) const = 0; - /** Method to get the list of underlying variable names used by the tile - * - * @return the list of variable names - */ - virtual std::vector underlying_source_variables() const = 0; - /** Method to get the name of the tile. - * - * @return the name of the tile - */ - std::string name() const - { - return _basename; - } - /** Method to get the tile format - * - * @return the format - */ - TileInfo format() const - { - return _format; - } - /** Method to know whether the tile is assignable or not (constant) - * - * @return true if the tile is assignable - */ - virtual bool is_assignable() const = 0; - /** Method to know whether the tile needs to be declared - * - * @return true if the tile needs to be declared in the code before being used - */ - virtual bool need_declaration() const = 0; -protected: - TileInfo _format { }; // Tile format - std::string _basename { "" }; // Tile name -}; - -// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context. -// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors -class IVectorTile : public IScalarTile -{ -public: - virtual ~IVectorTile() = default; - /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. - * The user can query the list of supported width for the vectors through preferred_vector_sizes(). - * - * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge - * - * @return the vector variable as a string - */ - virtual ValueAsString vector(int32_t y) const = 0; - /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. - * - * @return the vector variable as a string - */ - virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0; - /** Method to get the preferred vector sizes. - * - * @return a vector with the preferred vector sizes - */ - //virtual std::vector preferred_vector_sizes() const = 0; -}; - -class ClTile : public IVectorTile -{ -public: - ClTile(const std::string& name, TileInfo format) - { - _format = format; - _basename = name; - } - - ValueAsString scalar(int32_t x, int32_t y) const override - { - x = std::max(std::min(x, _format.w - 1), static_cast(0)); - y = std::max(std::min(y, _format.h - 1), static_cast(0)); - - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, 1); - t.type.dt = _format.dt; - t.type.size = 1; - - // Check required because if the width has only one element, we cannot use .s0 - if(_format.w != 1) - { - // Automatic broadcasting - t.str += ".s" + std::to_string(x); - } - - return t; - } - - ValueAsString vector(int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast(0)); - - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, _format.w); - t.type.dt = _format.dt; - t.type.size = _format.w; - return t; - } - - ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast(0)); - - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, width); - t.type.dt = _format.dt; - t.type.size = width; - - if(_format.w != 1) - { - t.str += ".s"; - for(int i = 0; i < width; ++i) - { - t.str += to_scalar_hex(x_start + i); - } - } - return t; - } - - std::vector underlying_source_variables() const override - { - std::vector vars; - for(int32_t y = 0; y < _format.h; ++y) - { - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, _format.w); - t.type.dt = _format.dt; - t.type.size = _format.w; - vars.push_back(t); - } - return vars; - } - - bool is_assignable() const override - { - return true; - } - - bool need_declaration() const override - { - return true; - } - -private: - std::string build_variable_name(int32_t y) const - { - std::string var_name = _basename; - - if(_format.h == 1) - { - return var_name; - - } - else - { - var_name += "_"; - var_name += std::to_string(y); - } - - return var_name; - } - - std::string to_scalar_hex(int32_t x) const - { - switch(x) - { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: - case 8: - case 9: - return std::to_string(x); - case 10: - return "A"; - case 11: - return "B"; - case 12: - return "C"; - case 13: - return "D"; - case 14: - return "E"; - case 15: - return "F"; - default: - std::cout << "Unsupported hexadecimal value" << std::endl; - assert(false); - return ""; - } - } -}; - -// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time. -class ClConstantTile : public IVectorTile -{ -public: - ClConstantTile(const std::vector> &in, DataType dt) - { - _format.w = in[0].size(); - _format.h = in.size(); - _format.dt = dt; - - _data = std::vector>(_format.h, std::vector(_format.w)); - - for(int32_t y = 0; y < _format.h; ++y) - { - for(int32_t x = 0; x < _format.w; ++x) - { - _data[y][x] = in[y][x]; - } - } - } - - ValueAsString scalar(int32_t x, int32_t y) const override - { - x = std::max(std::min(x, _format.w - 1), static_cast(0)); - y = std::max(std::min(y, _format.h - 1), static_cast(0)); - - ValueAsString t; - t.str = _data[y][x]; - t.type.str = get_cl_data_type(_format.dt, 1); - t.type.dt = _format.dt; - t.type.size = 1; - - return t; - } - - ValueAsString vector(int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast(0)); - - return vector(0, _format.w, y); - } - - ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast(0)); - - ValueAsString t; - t.str = ""; - t.type.str = get_cl_data_type(_format.dt, width); - t.type.dt = _format.dt; - t.type.size = width; - - if(width > 1) - { - t.str += "((" + get_cl_data_type(_format.dt, width) + ")("; - } - - int32_t x = x_start; - for(; x < width - 1; ++x) - { - t.str += scalar(x, y).str; - t.str += ", "; - } - t.str += scalar(x, y).str; - - if(width > 1) - { - t.str += "))"; - } - - return t; - } - - std::vector underlying_source_variables() const override - { - std::vector vars; - - for(int32_t y = 0; y < _format.h; ++y) - { - for(int32_t x = 0; x < _format.w; ++x) - { - ValueAsString t; - t.str = _data[y][x]; - t.type.str = get_cl_data_type(_format.dt, 1); - t.type.dt = _format.dt; - t.type.size = 1; - vars.push_back(t); - } - } - - return vars; - } - - bool is_assignable() const override - { - return false; - } - - bool need_declaration() const override - { - return false; - } - -private: - std::vector> _data{}; -}; - -enum class TensorComponentIndex : int32_t -{ - IndexMask = 0x0000000f, -}; - -enum class TensorComponentType : int32_t -{ - OffsetFirstElement = 0x00000100, - Stride = 0x00001000, - Dimension = 0x00010000, - FoldedDimension = 0x00100000, - Constant = 0x01000000 -}; - -enum class TensorComponent : int32_t -{ - Unknown = 0x00000000, - OffsetFirstElement = 0x00000100, - Stride1 = 0x00001001, - Stride2 = 0x00001002, - Stride3 = 0x00001003, - Stride4 = 0x00001004, - Dim0 = 0x00010000, - Dim1 = 0x00010001, - Dim2 = 0x00010002, - Dim3 = 0x00010003, - Dim4 = 0x00010004, - C = 0x00010000, // Dim0 - W = 0x00010001, // Dim1 - H = 0x00010002, // Dim2 - D = 0x00010003, - N = 0x00010004, - Dim1xDim2 = 0x00100021, - Dim1xDim2xDim3 = 0x00100321, - WxH = 0x00100021, - WxHxD = 0x00100321 -}; - -inline std::string to_string(TensorComponent x) -{ - switch(x) - { - case TensorComponent::Unknown: - return "Unknown"; - case TensorComponent::OffsetFirstElement: - return "OffsetFirstElement"; - case TensorComponent::Stride1: - return "Stride1"; - case TensorComponent::Stride2: - return "Stride2"; - case TensorComponent::Stride3: - return "Stride3"; - case TensorComponent::Stride4: - return "Stride4"; - case TensorComponent::Dim0: - return "Dim0"; - case TensorComponent::Dim1: - return "Dim1"; - case TensorComponent::Dim2: - return "Dim2"; - case TensorComponent::Dim3: - return "Dim3"; - case TensorComponent::Dim4: - return "Dim4"; - case TensorComponent::Dim1xDim2: - return "Dim1xDim2"; - case TensorComponent::Dim1xDim2xDim3: - return "Dim1xDim2xDim3"; - default: - assert(false); - } -} - -class ITensorArgument -{ -public: - virtual ~ITensorArgument() = default; - /** Method to get the tensor component as a string - * - * @param[in] x tensor component to query - * - * @return the tensor component as a string - */ - virtual std::string component(TensorComponent x) = 0; - /** Method to get the tensor component type declaration as a string - * - * @return the tensor component type declaration as a string - */ - virtual std::string component_type_declaration() const = 0; - /** Method to get the tensor component data type - * - * @return the tensor component data type - */ - virtual DataType component_data_type() const = 0; - /** Method to get the tensor component declarations - * - * @return a vector containing the tensor component declarations - */ - virtual std::vector component_declarations() const = 0; - /** Method to get the name of the tensor argument. - * - * @return the name of the tensor argument - */ - std::string name() const - { - return _basename; - } - /** Method to get the tensor format - * - * @return the format - */ - TensorInfo format() const - { - return _format; - } - -protected: - TensorInfo _format { }; - std::string _basename {}; -}; - -enum class GpuTensorStorage : int32_t -{ - Unknown = 0x0000, - BufferUint8Ptr = 0x0012, - Image2dReadOnly = 0x0020, - Image2dWriteOnly = 0x0021, - Image3dReadOnly = 0x0030, - Image3dWriteOnly = 0x0031 -}; - -class IGpuTensorArgument : public ITensorArgument -{ -public: - virtual ~IGpuTensorArgument() = default; - /** Method to get the tensor storage, which is the underlying storage used to keep the data memory - * - * @param[in] x tensor storage to query - * - * @return the tensor storage as a string - */ - virtual std::string storage(GpuTensorStorage x) = 0; - /** Method to get the tensor storage type declaration as a string - * - * @param[in] x tensor component to query - * - * @return the tensor storage type declaration as a string - */ - virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0; - /** Method to get the tensor storage declarations - * - * @return a vector containing the tensor storage declarations - */ - virtual std::vector storage_declarations() const = 0; -}; - -class ClTensorArgument : public IGpuTensorArgument -{ -public: - ClTensorArgument(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible) - { - _basename = name; - _format = x; - _return_by_value_when_possible = return_by_value_when_possible; - } - - // Methods to override - std::string component(TensorComponent x) override - { - if((static_cast(x) & static_cast(TensorComponentType::Constant))) - { - int32_t idx = static_cast(x) & static_cast(TensorComponentIndex::IndexMask); - return std::to_string(idx - 1); - } - - if(_return_by_value_when_possible) - { - if((static_cast(x) & static_cast(TensorComponentType::Dimension))) - { - int32_t idx = static_cast(x) & static_cast(TensorComponentIndex::IndexMask); - return std::to_string(_format.shape[idx]); - } - - if((static_cast(x) & static_cast(TensorComponentType::FoldedDimension))) - { - switch(x) - { - case TensorComponent::Dim1xDim2: - return std::to_string(_format.shape[1] * _format.shape[2]); - case TensorComponent::Dim1xDim2xDim3: - return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]); - default: - std::cout << "Unsupported folded dimension" << std::endl; - assert(false); - } - } - } - - if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end()) - { - _components_required.push_back(x); - } - - return build_component_name(x); - } - - std::string component_type_declaration() const override - { - return "int"; - }; - - DataType component_data_type() const override - { - return DataType::Int32; - } - - std::string storage(GpuTensorStorage x) override - { - if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end()) - { - _storage_required.push_back(x); - } - - return build_storage_name(x); - } - - std::string storage_type_declaration(GpuTensorStorage x) const override - { - switch(x) - { - case GpuTensorStorage::BufferUint8Ptr: - return "__global uchar*"; - case GpuTensorStorage::Image2dReadOnly: - return "__read_only image2d_t"; - case GpuTensorStorage::Image2dWriteOnly: - return "__write_only image2d_t"; - case GpuTensorStorage::Image3dReadOnly: - return "__read_only image3d_t "; - case GpuTensorStorage::Image3dWriteOnly: - return "__write_only image3d_t "; - default: - std::cout << "Unsupported storage" << std::endl; - assert(false); - return ""; - } - }; - - std::vector storage_declarations() const override - { - return _storage_required; - } - - std::vector component_declarations() const override - { - return _components_required; - } - -private: - std::string build_storage_name(GpuTensorStorage x) const - { - std::string var_name = _basename; - - switch(x) - { - case GpuTensorStorage::BufferUint8Ptr: - return var_name + "_ptr"; - case GpuTensorStorage::Image2dReadOnly: - case GpuTensorStorage::Image2dWriteOnly: - return var_name + "_img2d"; - case GpuTensorStorage::Image3dReadOnly: - case GpuTensorStorage::Image3dWriteOnly: - return var_name + "_img3d"; - default: - std::cout << "Unsupported storage" << std::endl; - assert(false); - } - - return var_name; - } - - std::string build_component_name(TensorComponent x) const - { - std::string var_name = _basename; - - switch(x) - { - case TensorComponent::OffsetFirstElement: - return var_name + "_offset_first_element"; - case TensorComponent::Stride1: - return var_name + "_stride1"; - case TensorComponent::Stride2: - return var_name + "_stride2"; - case TensorComponent::Stride3: - return var_name + "_stride3"; - case TensorComponent::Dim0: - return var_name + "_dim0"; - case TensorComponent::Dim1: - return var_name + "_dim1"; - case TensorComponent::Dim2: - return var_name + "_dim2"; - case TensorComponent::Dim3: - return var_name + "_dim3"; - case TensorComponent::Dim1xDim2: - return var_name + "_dim1xdim2"; - case TensorComponent::Dim1xDim2xDim3: - return var_name + "_dim1xdim2xdim3"; - default: - std::cout << "Unsupported component" << std::endl; - assert(false); - } - - return var_name; - } - - bool _return_by_value_when_possible { false }; - std::vector _storage_required {}; - std::vector _components_required {}; -}; - -/** - * @brief Data structure that contains the declared tiles by the components. - * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to - * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack - * and remove (pop) all the tiles from the level above. - * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code. - * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions - * when declaring tiles among different components. - * - */ -class GpuTileRegistry -{ -public: -enum class RegistryTileType -{ - Tile, - Link -}; - -using RegistryIdSpace = int32_t; -using RegistryLevel = int32_t; -using RegistryTileName = std::string; - -struct RegistryTileTableEntry -{ - RegistryLevel registry_level { 0 }; - std::unique_ptr tile_object { nullptr }; -}; - -struct RegistryTileTypeTableEntry -{ - RegistryTileType tile_type { RegistryTileType::Tile }; - RegistryTileName tile_name {}; - RegistryIdSpace registry_idspace { 0 }; - RegistryLevel registry_level { 0 }; -}; - -using RegistryTileTable = std::map>; -using RegistryTileTypeTable = std::map>; - /** - * @brief Construct a new Gpu Tile Registry object - * - */ - GpuTileRegistry() - { - _language = GpuTargetLanguage::Unknown; - } - /** - * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language - * - * @param[in] language Gpu programming language to use - */ - GpuTileRegistry(GpuTargetLanguage language) - { - _language = language; - } - /** - * @brief Default destructor. Destroy the Gpu Tile Registry object - * - */ - ~GpuTileRegistry() = default; - /** - * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles. - * Therefore, the IdSpace should be set before declaring any tiles. - * - * @param[in] id The IdSpace id - */ - void set_IdSpace(int32_t id) - { - _IdSpace = id; - } - /** - * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles - * - * @return The IdSpace id - */ - int32_t IdSpace() const - { - return _IdSpace; - } - /** - * @brief Gets all the IdSpace declarations defined in the tile registry. - * - * @return all the IdSpace declarations defined in the tile registry as std::vector. It returns an empty vector if there are no IdSpace declarations. - */ - std::vector IdSpace_declarations() const - { - std::vector x; - - auto it = _frags.begin(); - - while (it != _frags.end()) - { - x.push_back(it->first); - - it++; - } - - return x; - } - /** - * @brief Declare a tile from a previously created tile - */ - void insert(const std::string& name, const IVectorTile *frag) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - const std::string var_name = frag->name(); - TileInfo format = frag->format(); - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if(result == nullptr) - { - std::unique_ptr tile = std::make_unique(var_name, format); - - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - } - /** - * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace() - * - * @note The reference name used for declaring the tile should not be previously used in the IdSpace - * - * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry. - * @param[in] format Tile format use to use - */ - void insert(const std::string& name, const TileInfo& format) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - const std::string var_name = generate_tile_name(name); - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if(result == nullptr) - { - std::unique_ptr tile = std::make_unique(var_name, format); - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - } - /** - * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string - * - * @note The reference name used for declaring the tile should not be previously used in the IdSpace - * - * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry. - * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile - * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure - * that the data type is aligned with the content of the std::string. - */ - void insert(const std::string& name, const std::vector>& in, DataType dt) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if(result == nullptr) - { - std::unique_ptr tile = std::make_unique(in, dt); - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - } - /** - * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string - * - * @note This method can be used to declare temporary tiles that need to be accessed only once. - * - * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile - * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure - * that the data type is aligned with what passed with the std::string. - * - * @return IVectorTile* the anonymous constant tile - */ - IVectorTile* insert(const std::vector>& in, DataType dt) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++); - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if(result == nullptr) - { - std::unique_ptr tile = std::make_unique(in, dt); - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - - return (*this)[key_var_name]; - } - /** - * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user - * - * @param[in] name The name of the tile to retrieve - * @param[in] IdSpace The IdSpace id where to search the tile - * - * @return IVectorTile* The tile - */ - IVectorTile* get(const std::string& name, int32_t IdSpace) - { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; - - IVectorTile* result = nullptr; - auto search_IdSpace = _frags.find(key_IdSpace); - if(search_IdSpace != _frags.end()) - { - auto search_tile = _frags[key_IdSpace].find(key_var_name); - if(search_tile != _frags[key_IdSpace].end()) - { - result = search_tile->second.tile_object.get(); - assert(result != nullptr); - } - } - - return result; - } - /** - * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace() - * - * @param[in] name The name of the tile to retrieve - * - * @return IVectorTile* The tile - */ - IVectorTile* operator[](const std::string& name) - { - return get(name, _IdSpace); - } - /** - * @brief Check whether the tile in the in the IdSpace provided by the user exists - * - * @param[in] name Name of the tile to search for - * @param[in] IdSpace The IdSpace id where to search the tile - * - * @return true if the tile exists - * @return false if the tile does not exist - */ - bool has_tile(const std::string& name, int32_t IdSpace) const - { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; - - // IVectorTile* result = nullptr; - auto search_IdSpace = _frags.find(key_IdSpace); - - return search_IdSpace != _frags.end(); - } - /** - * @brief Check whether the tile within the current IdSpace exists - * - * @param[in] name Name of the tile to search for - * - * @return true if the tile exists - * @return false if the tile does not exist - */ - bool has_tile(const std::string& name) const - { - return has_tile(name, _IdSpace); - } - /** - * @brief Get all the tiles declared within the IdSpace provided by the user - * - * @param[in] IdSpace IdSpace where to retrieve all the declared tiles - * - * @return std::vector A vector with all the declared tiles in the IdSpace provided by the user - */ - std::vector tile_declarations(int32_t IdSpace) - { - std::vector tiles; - - std::map::iterator it = _frag_types[IdSpace].begin(); - - while (it != _frag_types[IdSpace].end()) - { - // The following line should be enabled. However, we cannot at this stage - // because it used to retrieve the output tile produced by each component. - // However, this method should NOT be used to retrieve the output tile - //if(it->second.tile_type == RegistryTileType::Tile) - { - tiles.push_back(get(it->second.tile_name, it->second.registry_idspace)); - } - it++; - } - - return tiles; - } - /** - * @brief Increase the level of stack. - * - */ - void increment_registry_level() - { - _registry_level++; - } - /** - * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack. - * - */ - void decrement_registry_level() - { - assert(_registry_level >= 0); - - // Remove all variables in the local scope - std::map::iterator it = _frags[_IdSpace].begin(); - - while (it != _frags[_IdSpace].end()) - { - if (it->second.registry_level == _registry_level) - { - it = _frags[_IdSpace].erase(it); - } - else - { - it++; - } - } - - std::map::iterator it_type = _frag_types[_IdSpace].begin(); - - while (it_type != _frag_types[_IdSpace].end()) - { - if (it_type->second.registry_level == _registry_level) - { - it_type = _frag_types[_IdSpace].erase(it_type); - } - else - { - it_type++; - } - } - - _registry_level--; - } - /** - * @brief Get the level of the stack - * - */ - int32_t level() const - { - return _registry_level; - } - -private: - // This method ensures that the key is unique among different components - std::string generate_tile_name(const std::string& name) - { - assert(_IdSpace >= 0 ); - if(_registry_level == 0) - { - return "_G" + std::to_string(_IdSpace) + "_" + name; - } - else - { - return name; - } - } - RegistryTileTable _frags {}; - RegistryTileTypeTable _frag_types {}; - RegistryLevel _registry_level { 0 }; - RegistryIdSpace _IdSpace { -1 }; - int32_t _anonymous_frag_count { 0 }; // Counter used to create the anonymous tiles - GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language -}; - -using TensorEntry = std::unique_ptr; - -/** - * @brief Data structure that contains the tensors consumed by the components. - * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions - * when declaring tensors among different components. - * - */ -class GpuTensorArgumentRegistry -{ -public: - /** - * @brief Construct a new Gpu Tensor Registry object - * - */ - GpuTensorArgumentRegistry() - { - _language = GpuTargetLanguage::Unknown; - } - /** - * @brief Construct a new Gpu Tensor Registry object - * - * @param[in] language Gpu programming language to use - */ - GpuTensorArgumentRegistry(GpuTargetLanguage language) - { - _language = language; - } - /** - * @brief Default destructor. Destroy the Gpu Tensor Registry object - * - */ - ~GpuTensorArgumentRegistry() = default; - /** - * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors. - * Therefore, the IdSpace should be set before declaring any tensors. - * - * @param[in] id The IdSpace id - */ - void set_IdSpace(int32_t id) - { - _IdSpace = id; - } - /** - * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors - * - * @return The IdSpace id - */ - int32_t IdSpace() const - { - return _IdSpace; - } - /** - * @brief Gets all the IdSpace declarations defined in the tensor registry. - * - * @return all the IdSpace declarations defined in the tensor registry as std::vector. It returns an empty vector if there are no IdSpace declarations. - */ - std::vector IdSpace_declarations() const - { - std::vector x; - - auto it = _refs.begin(); - - while (it != _refs.end()) - { - x.push_back(it->first); - - it++; - } - - return x; - } - /** - * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace() - * - * @note The reference name used for declaring the tensor should not be previously used in the IdSpace - * - * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry. - * @param[in] x Pair of tensor info and tensor id - * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components - */ - void insert(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const int32_t tensor_id = x.id; - const std::string key_var_name = name; - const std::string var_name = generate_tensor_name(name, tensor_id); - - // First, check whether the tensor has already a reference. If so, trigger an assert - assert(!has_tensor_argument(name)); - - // Check whether a tensor with that tensorID exists - auto result = _tensor_arguments.find(tensor_id); - if(result == _tensor_arguments.end()) - { - // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference - std::unique_ptr arg = std::make_unique(var_name, x, return_by_value_when_possible); - _tensor_arguments[tensor_id] = std::move(arg); - } - - _refs[key_IdSpace][key_var_name] = tensor_id; - } - /** - * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace() - * - * @param[in] name The name of the tensor to retrieve - * - * @return IGpuTensor* The tensor - */ - IGpuTensorArgument* operator[](const std::string& name) - { - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - - IGpuTensorArgument* result = nullptr; - auto search_IdSpace = _refs.find(key_IdSpace); - if(search_IdSpace != _refs.end()) - { - auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - - if(search_tensor_id != _refs[key_IdSpace].end()) - { - const int32_t tensor_id = search_tensor_id->second; - auto search_tensor_argument = _tensor_arguments.find(tensor_id); - if(search_tensor_argument != _tensor_arguments.end()) - { - result = search_tensor_argument->second.get(); - } - assert(result != nullptr); - } - } - - return result; - } - /** - * @brief Get all the tensors declared in the IdSpace provided by the user - * - * @return std::vector A vector with all the declared tensors - */ - std::vector tensor_argument_declarations() - { - std::vector args; - - auto it = _tensor_arguments.begin(); - - while (it != _tensor_arguments.end()) - { - args.push_back(it->second.get()); - it++; - } - - return args; - } - /** - * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists - * - * @param[in] name Name of the tensor argument to search for - * - * @return true if the tensor argument exists - * @return false if the tensor argument does not exist - */ - bool has_tensor_argument(const std::string& name) - { - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - - auto search_IdSpace = _refs.find(key_IdSpace); - - if(search_IdSpace != _refs.end()) - { - auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - - return search_tensor_id != _refs[key_IdSpace].end(); - } - else - { - return false; - } - } - /** - * @brief Check whether the tensor argument is in the the IdSpace provided by the user - * - * @param[in] name Name of the tensor argument to search for - * @param[in] IdSpace The IdSpace id where to search the tensor argument - * - * @return true if the tile exists - * @return false if the tile does not exist - */ - bool has_tensor_argument(const std::string& name, int32_t IdSpace) - { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; - - auto search_IdSpace = _refs.find(key_IdSpace); - - if(search_IdSpace != _refs.end()) - { - auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - - return search_tensor_id != _refs[key_IdSpace].end(); - } - else - { - return false; - } - } -private: - // This method ensures that the key is unique among different components - std::string generate_tensor_name(const std::string& name, int32_t tensor_id) - { - assert(tensor_id >= 0 ); - - return name + std::to_string(tensor_id); - } - - std::map _tensor_arguments {}; - std::map> _refs {}; - int32_t _IdSpace { -1 }; - GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language -}; - -enum class OpType : int32_t -{ - Elementwise = 0x0000, - Relational = 0x1000, - Algebra = 0x2000 -}; - -inline std::string to_string(AssignmentOp op) -{ - switch(op) - { - case AssignmentOp::Decrement: - return "-="; - case AssignmentOp::Increment: - return "+="; - default: - assert(false); - return ""; - } -} - -inline std::string to_string(BinaryOp op) -{ - switch(op) - { - case BinaryOp::Add: - return "+"; - case BinaryOp::Sub: - return "-"; - case BinaryOp::Mul: - return "*"; - case BinaryOp::Div: - return "/"; - case BinaryOp::Mod: - return "%"; - case BinaryOp::Equal: - return "=="; - case BinaryOp::Less: - return "<"; - case BinaryOp::LessEqual: - return "<="; - case BinaryOp::Greater: - return ">"; - case BinaryOp::GreaterEqual: - return ">="; - case BinaryOp::LogicalAnd: - return "&&"; - case BinaryOp::LogicalOr: - return "||"; - case BinaryOp::LogicalNot: - return "!"; - default: - assert(false); - return ""; - } -} - -inline std::string binary_op_string(BinaryOp op) -{ - switch(op) - { - case BinaryOp::Add: - return "add"; - case BinaryOp::Sub: - return "sub"; - case BinaryOp::Mul: - return "mul"; - case BinaryOp::Div: - return "div"; - case BinaryOp::Mod: - return "mod"; - case BinaryOp::Equal: - return "eq"; - case BinaryOp::Less: - return "gt"; - case BinaryOp::LessEqual: - return "gteq"; - case BinaryOp::Greater: - return "lt"; - case BinaryOp::GreaterEqual: - return "lte"; - default: - assert(false); - return ""; - } -} - -enum class OperandType : int32_t -{ - Unknown = 0x00000000, - ScalarFp32 = 0x00001011, // Immediate scalar tile - ScalarFp16 = 0x00001012, // Immediate scalar tile - ScalarInt32 = 0x00001021, // Immediate scalar tile - ScalarInt16 = 0x00001022, // Immediate scalar tile - ScalarInt8 = 0x00001024, // Immediate scalar tile - ScalarUInt32 = 0x00001031, // Immediate scalar tile - ScalarUInt16 = 0x00001032, // Immediate scalar tile - ScalarUInt8 = 0x00001034, // Immediate scalar tile - ScalarBool = 0x00001041, // Immediate scalar tile - ScalarTile = 0x00001050, // Scalar from a tile - Tile = 0x00010000, // Tile - TensorStride1 = 0x00100001, // Tensor component - TensorStride2 = 0x00100002, // Tensor component - TensorStride3 = 0x00100003, // Tensor component - TensorStride4 = 0x00100004, // Tensor component - TensorDim0 = 0x00100010, // Tensor component - TensorDim1 = 0x00100020, // Tensor component - TensorDim2 = 0x00100030, // Tensor component - TensorDim3 = 0x00100040, // Tensor component - TensorDim4 = 0x00100050, // Tensor component - TensorC = 0x00100010, // Tensor component - TensorW = 0x00100020, // Tensor component - TensorH = 0x00100030, // Tensor component - TensorD = 0x00100040, // Tensor component - TensorN = 0x00100050, // Tensor component - TensorDim1xDim2 = 0x00100100, // Tensor component - TensorDim1xDim2xDim3 = 0x00100200, // Tensor component - TensorWxH = 0x00100300, // Tensor component - TensorWxHxD = 0x00100400, // Tensor component - TensorDataOffset = 0x00100500, // Tensor component -}; - -struct ScalarTileCoord -{ - ScalarTileCoord() {} - ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0) {} - int32_t x { -1 }; - int32_t y { -1 }; -}; -/** - * @brief Operand class. This object is used to pass the operands to the operations performed by the writer. - * Operand can be of three types: - * -# Scalar immediate: constant expression - * -# Tile: A tile - * -# Tensor component: A component (scalar) of a tensor - * - */ -class Operand -{ -public: - Operand(const std::string &val) - { - _str = val; - _type = OperandType::Tile; - } - - Operand(const std::string &val, const ScalarTileCoord& coord) - { - _str = val; - _type = OperandType::ScalarTile; - _coord = coord; - } - - Operand(const std::string &val, OperandType type) - { - _str = val; - _type = type; - } - - Operand(const Operand& t) - { - _str = t.value(); - _type = t.type(); - } - - Operand& operator=(const Operand& t) - { - _str = t.value(); - _type = t.type(); - _coord = t.scalar_tile_coordinate(); - return *this; - } - - std::string value() const - { - return _str; - } - - OperandType type() const - { - return _type; - } - - ScalarTileCoord scalar_tile_coordinate() const - { - return _coord; - } - -private: - std::string _str {}; - OperandType _type { OperandType::Unknown }; - ScalarTileCoord _coord {}; -}; - -enum class GpuSamplerTensorStorage : int32_t -{ - Unknown = static_cast(GpuTensorStorage::Unknown), - BufferUint8Ptr = static_cast(GpuTensorStorage::BufferUint8Ptr), - Image2dReadOnly = static_cast(GpuTensorStorage::Image2dReadOnly), - Image2dWriteOnly = static_cast(GpuTensorStorage::Image2dWriteOnly), - Image3dReadOnly = static_cast(GpuTensorStorage::Image3dReadOnly), - Image3dWriteOnly = static_cast(GpuTensorStorage::Image2dWriteOnly), -}; - -struct GpuSampler -{ - GpuSampler() = default; - TensorSamplerFormat format { TensorSamplerFormat::Unknown }; - GpuSamplerTensorStorage storage { GpuSamplerTensorStorage::Unknown }; - TensorSamplerAddressModeX address_mode_x { TensorSamplerAddressModeX::Unknown }; - TensorSamplerAddressModeY address_mode_y { TensorSamplerAddressModeY::Unknown }; - TensorSamplerAddressModeZ address_mode_z { TensorSamplerAddressModeZ::Unknown }; -}; - -inline GpuSampler create_simple_sampler(const TensorInfo* tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z) -{ - CKW_UNUSED(step_x, step_y, step_z); - - auto tensor = tensor_info_id->shape; - - GpuSampler dst_sampler; - dst_sampler.format = sampler.format; - dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr; - dst_sampler.address_mode_x = sampler.address_mode_x; - dst_sampler.address_mode_y = sampler.address_mode_y; - dst_sampler.address_mode_z = sampler.address_mode_z; - - int32_t dim_x = 0; - int32_t dim_y = 0; - int32_t dim_z = 0; - - switch(sampler.format) - { - case TensorSamplerFormat::C_W_H: - dim_x = tensor[0]; - dim_y = tensor[1]; - dim_z = tensor[2]; - break; - case TensorSamplerFormat::C_WH_1: - dim_x = tensor[0]; - dim_y = tensor[1] * tensor[2]; - dim_z = 1; - break; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; - } - - if(dim_x == 1) - { - assert(step_x == 1); - dst_sampler.address_mode_x = TensorSamplerAddressModeX::None; - } - - if(dim_y == 1) - { - assert(step_y == 1); - dst_sampler.address_mode_y = TensorSamplerAddressModeY::None; - } - - if(dim_z == 1) - { - assert(step_z == 1); - dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None; - } - - return dst_sampler; -} - -class GpuOutputSampler -{ -public: - GpuOutputSampler() = default; - /** - * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once - * by the root component. Once initialized, all simpler components will need to used this sampler - * or a broadcasted version of it - * - * @param[in] sampler GpuSampler - * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile! - * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile! - * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile! - */ - void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z) - { - assert(_is_initialized == false); - - _step_x = step_x; - _step_y = step_y; - _step_z = step_z; - _tensor_info_id = tensor_info_id; - _sampler = create_sampler(tensor_storage, tensor_format); - _is_initialized = true; - }; - - GpuSampler sampler() const - { - return _sampler; - }; - - int32_t step_x() const - { - return _step_x; - }; - - int32_t step_y() const - { - return _step_y; - }; - - int32_t step_z() const - { - return _step_z; - }; -private: - GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format) - { - // Output can only be in output mode - assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly); - assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly); - - auto tensor = _tensor_info_id->shape; - - GpuSampler sampler; - sampler.format = tensor_format; - sampler.storage = tensor_storage; - sampler.address_mode_x = TensorSamplerAddressModeX::None; - sampler.address_mode_y = TensorSamplerAddressModeY::None; - sampler.address_mode_z = TensorSamplerAddressModeZ::None; - - // In the case of texture, we do not need any special checks at the border - if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr) - { - int32_t dim_x = 0; - int32_t dim_y = 0; - int32_t dim_z = 0; - - switch(tensor_format) - { - case TensorSamplerFormat::C_W_H: - dim_x = tensor[0]; - dim_y = tensor[1]; - dim_z = tensor[2]; - break; - case TensorSamplerFormat::C_WH_1: - dim_x = tensor[0]; - dim_y = tensor[1] * tensor[2]; - dim_z = 1; - break; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; - } - - if((dim_x % _step_x) != 0 && dim_x != 1) - { - sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin; - } - - if((dim_y % _step_y) != 0 && dim_y != 1) - { - sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly; - } - - if((dim_z % _step_z) != 0 && dim_z != 1) - { - sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly; - } - } - - return sampler; - } - GpuSampler _sampler { }; // GpuSampler - int32_t _step_x { 1 }; - int32_t _step_y { 1 }; - int32_t _step_z { 1 }; - const TensorInfo* _tensor_info_id { nullptr }; - bool _is_initialized { false }; -}; - -/** - * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer. - */ -class TensorOperand -{ -public: - TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler) - { - } - - TensorOperand& operator=(const TensorOperand& t) - { - _str = t.value(); - _sampler = t.sampler(); - return *this; - } - - std::string value() const - { - return _str; - } - - GpuSampler sampler() const - { - return _sampler; - } - -private: - std::string _str {}; - GpuSampler _sampler {}; -}; - -/** - * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer - * This data structure must be initialized before being passed to the Gpu Kernel Writer - * - */ -class GpuKernelWriterDataHolder -{ -public: - /** - * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store - * the GPU target and target specific capabilities (extensions). For now, we just initialize the - * programming language - * - * @param[in] language Gpu programming language to use - */ - GpuKernelWriterDataHolder(GpuTargetLanguage language) : tiles(language), arguments(language), code(""), _language(language) - { - } - /** - * @brief Get the Gpu programming language used - * - * @return GpuTargetLanguage the Gpu programming language - */ - GpuTargetLanguage programming_language() const - { - return _language; - } - /** - * @brief @ref GpuTileRegistry - * - */ - GpuTileRegistry tiles{}; - /** - * @brief @ref GpuTensorArgumentRegistry - * - */ - GpuTensorArgumentRegistry arguments{}; - /** - * @brief @ref GpuOutputSampler. - * - */ - GpuOutputSampler output_sampler{}; - /** - * @brief Source code - * - */ - std::string code{}; - - // GpuExtensionRegistry extensions{}; -private: - GpuTargetLanguage _language; -}; - -struct LWS -{ - int32_t x {1}; - int32_t y {1}; - int32_t z {1}; -}; - -/** - * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker - * declare an anonymous tile in the tile registry. - */ -class OperandUnpacker -{ -public: - OperandUnpacker(GpuTileRegistry& tiles, GpuTensorArgumentRegistry& arguments) : _tiles(tiles), _arguments(arguments) - { - // Increase the level of the stack to allocate possible temporary tiles - _tiles.increment_registry_level(); - }; - - ~OperandUnpacker() - { - // Decrease the level of the stack to deallocate any temporary tiles - _tiles.decrement_registry_level(); - } - - IVectorTile* unpack(const Operand& src) - { - // Get the tile - if(src.type() == OperandType::Tile) - { - assert(_tiles.has_tile(src.value())); - return _tiles[src.value()]; - } - // Create an anonymous tile with a constant - else if(static_cast(src.type()) & 0x00001000) - { - if(src.type() == OperandType::ScalarTile) - { - ScalarTileCoord coord = src.scalar_tile_coordinate(); - assert(_tiles.has_tile(src.value())); - assert(coord.x >= 0); - assert(coord.y >= 0); - auto val = _tiles[src.value()]->scalar(coord.x, coord.y); - return _tiles.insert({{{val.str}}}, val.type.dt); - } - else - { - return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type())); - } - } - // Create an anonymous tile with the tensor component - else - { - assert(_arguments.has_tensor_argument(src.value())); - auto x = _arguments[src.value()]; - const std::string val = x->component(to_tensor_component(src.type())); - const DataType dt = x->component_data_type(); - return _tiles.insert({{{val}}}, dt); - } - } - -private: - DataType to_tile_data_type(OperandType x) - { - return static_cast(static_cast(x) & 0x00ff); - } - - TensorComponent to_tensor_component(OperandType x) - { - switch(x) - { - case OperandType::TensorDim0: - return TensorComponent::Dim0; - case OperandType::TensorDim1: - return TensorComponent::Dim1; - case OperandType::TensorDim2: - return TensorComponent::Dim2; - case OperandType::TensorDim3: - return TensorComponent::Dim3; - case OperandType::TensorDim4: - return TensorComponent::Dim4; - case OperandType::TensorStride1: - return TensorComponent::Stride1; - case OperandType::TensorStride2: - return TensorComponent::Stride2; - case OperandType::TensorStride3: - return TensorComponent::Stride3; - case OperandType::TensorStride4: - return TensorComponent::Stride4; - case OperandType::TensorDim1xDim2: - return TensorComponent::Dim1xDim2; - case OperandType::TensorDim1xDim2xDim3: - return TensorComponent::Dim1xDim2xDim3; - case OperandType::TensorDataOffset: - return TensorComponent::OffsetFirstElement; - default: - assert(false); - return TensorComponent::Unknown; - } - } - - GpuTileRegistry& _tiles; - GpuTensorArgumentRegistry& _arguments; -}; - -/** - * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker - * declare an anonymous tile in the tile registry. - * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure. - */ -class TensorOperandUnpacker -{ -public: - TensorOperandUnpacker(GpuTensorArgumentRegistry& arguments) : _arguments(arguments) - { - }; - - IGpuTensorArgument* unpack(const TensorOperand& src) - { - assert(_arguments.has_tensor_argument(src.value())); - return _arguments[src.value()]; - } - -private: - GpuTensorArgumentRegistry& _arguments; -}; - -/** - * @brief The GpuKernel will be used in three occasions (stages): - * #- Compilation stage - * #- Tuning stage - * #- Dispatch stage - */ -struct GpuKernel -{ - // Compilation stage - std::string code {}; // Source code, required for the compilation stage - std::vector list_extensions{}; // Extensions, required for the compilation stage - // Tuning stage - std::string config_id {}; // Unique id, required for the tuning stage - std::vector list_lws{}; // LWS to test, required for the tuning stage - // Dispatch stage - GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage - std::vector> list_tensor_storages; // List of tensor storages, required for the dispatch stage - std::vector> list_tensor_components;// List of tensor components (width, stride,..), required for the dispatch stage) -}; - -// This function should produce an object with the source -inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string& name) -{ - std::string code; - code += "__kernel void "; - code += name; - code += "(\n"; - - auto IdSpaces = in.arguments.IdSpace_declarations(); - - std::vector arg_str; - - auto tensor_args = in.arguments.tensor_argument_declarations(); - - for(auto &i : tensor_args) - { - // For each tensor used, get the storage and tensor components - auto storages = i->storage_declarations(); - auto components = i->component_declarations(); - - for(auto &y : storages) - { - std::string str; - str += i->storage_type_declaration(y); - str += " "; - str += i->storage(y); - arg_str.push_back(str); - } - - for(auto &y : components) - { - std::string str; - str += i->component_type_declaration(); - str += " "; - str += i->component(y); - arg_str.push_back(str); - } - } - - for(size_t i = 0; i < arg_str.size(); ++i) - { - code += arg_str[i]; - if(i + 1 < arg_str.size()) - { - code += ",\n"; - } - } - - code += ")\n"; - code += "{\n"; - code += in.code; - code += "}\n"; - - return code; -} - -/** - * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know - * how to reduce the dimensionality of a tensor - * - */ -class GpuTensor3dMapper -{ -public: - GpuTensor3dMapper(IGpuTensorArgument* tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor) - { - }; - - std::string tensor_component_x() const - { - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponent::C); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_y() const - { - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - return _tensor->component(TensorComponent::WxH); - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponent::W); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_z() const - { - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - return "1"; - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponent::H); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_stride_y() const - { - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponent::Stride1); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_stride_z() const - { - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - return "0"; - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponent::Stride2); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_stride_batch() const - { - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponent::Stride3); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_x() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return t.shape[0] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_y() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - return (t.shape[1] * t.shape[2]) == 1; - case TensorSamplerFormat::C_W_H: - return t.shape[1] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_z() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - return true; - case TensorSamplerFormat::C_W_H: - return t.shape[2] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_batch() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch(format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return t.shape[3] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - GpuSampler gpu_sampler() const - { - return _sampler; - } - - IGpuTensorArgument* tensor_argument() const - { - return _tensor; - } - -private: - GpuSampler _sampler; - IGpuTensorArgument* _tensor; -}; - -struct GpuKernelWriterAttribute -{ - bool return_tensor_component_by_value { false }; -}; - -enum class ConvertPolicy -{ - Wrap, /**< Wrap around */ - Saturate /**< Saturate */ -}; - -enum class RoundingMode -{ - None, - Rte, - Rtz, - Rtp, - Rtn -}; - -// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html -class IGpuKernelWriter -{ -public: - virtual ~IGpuKernelWriter() = default; - virtual void set_IdSpace(int32_t id) = 0; - virtual void import_tile(const std::string& dst, const IVectorTile *src) = 0; - virtual void declare_argument(const std::string& name, const TensorInfo& tensor) = 0; - virtual void declare_tile(const std::string& name, const TileInfo& info) = 0; - virtual void declare_const_tile(const std::string& name, const std::vector>& in, DataType dt) = 0; - virtual void write_text(const std::string& x) = 0; - virtual void compound_statement_begin() = 0; - virtual void compound_statement_end() = 0; - - // Operations - virtual void op_get_global_id(const Operand& dst_var, int32_t dim) = 0; - virtual void op_get_global_coord(const Operand& dst, const Operand& step, const TensorOperand& tensor, int32_t dim) = 0; - virtual void op_get_global_batch(const Operand& dst, const TensorOperand& tensor) = 0; - virtual void op_get_global_size(const Operand& dst_var, int32_t dim) = 0; - virtual void op_binary_expression(const Operand& dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - virtual void op_assign(const Operand& dst_name, const Operand& src_name) = 0; - virtual void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) = 0; - virtual void op_if(const Operand& lhs, BinaryOp op, const Operand& rhs) = 0; - virtual void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value, AssignmentOp update_op, const Operand& update_value) = 0; - virtual void op_load_indirect(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y_indirect, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0; - virtual void op_load_immediate(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32), const Operand& dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; - virtual void op_store_immediate(const TensorOperand& tensor, const Operand& src, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0; - virtual void op_cast_expression(const Operand& dst, const Operand &src, ConvertPolicy policy) = 0; - virtual void op_return() = 0; - // virtual void op_else() = 0; - // virtual void op_elseif() = 0; - // Utils - // It is the process of converting - virtual void util_get_indirect_buffer(const Operand& dst, const TensorOperand& tensor, const Operand& x, const Operand& y, const Operand& x_off, const Operand& y_off) = 0; -}; - -enum class GpuLoadStoreType -{ - Load = 1, - Store = 2 -}; - -class IGpuLoadStoreHelperWriter -{ -public: - IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type) : _writer(x), _mapper(mapper), _type(type) {} - IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default; - IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default; - virtual ~IGpuLoadStoreHelperWriter() = default; - virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0; - virtual void write(const std::pair& y) = 0; - virtual void finalize() = 0; -protected: - IGpuKernelWriter* _writer; - GpuTensor3dMapper _mapper; - GpuLoadStoreType _type; -}; - -class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter -{ -public: - ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type) - { - } - - ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default; - ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default; - - static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) - { - CKW_UNUSED(x, type, dst); - - if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr) - { - return false; - } - return true; - } - - void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override - { - assert(validate(_writer, _mapper, _type, dst)); - - _dst = dst; - _ls_width_full = dst->format().w; - - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - _coord_orig_z = _coord_z; - - out_of_bound_initialize_x(_coord_x); - out_of_bound_initialize_z(_coord_z); - - /* - meaning of else: - - x: partial load/store - - y: no load/store operation - - z: no load/store operation - if(x) - { - if(z) - { - if(y) - { - // full load/store width - } - else - { - // no load/store - } - } - else - { - // no load/store - } - } - else - { - if(z) - { - if(y) - { - // partial load/store width - } - else - { - // no load/store - } - } - else - { - // no load/store - } - } - */ - } - - void write(const std::pair& y) override - { - int32_t idx_y = y.first; - std::string coord_y = y.second; - - // The only check required is on Y. - out_of_bound_initialize_y(coord_y); - - const std::string dst = _dst->vector(idx_y).str; - const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b); - const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address); - - _writer->write_text(ls_buf); - _writer->write_text(";\n"); - - out_of_bound_finalize_y(dst); - - // The left over load/store will be written in the finalize stage - if(_ls_width_part.size() != 0) - { - int32_t w = 0; - for(auto &p : _ls_width_part) - { - const std::string dst0 = _dst->vector(w, p, idx_y).str; - const std::string coord_x = _coord_x + " + " + std::to_string(w); - const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b); - const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address); - _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0)); - - w += p; - } - } - } - - void finalize() override - { - out_of_bound_finalize_z(); - out_of_bound_finalize_x(); - } -private: - IVectorTile* _dst { nullptr }; - int32_t _ls_width_full { 0 }; - std::vector _ls_width_part { }; - std::vector, std::string>> _leftovers_x {}; - std::string _coord_x {}; - std::string _coord_z {}; - std::string _coord_orig_z {}; - std::string _coord_b {}; - - void out_of_bound_initialize_x(std::string& coord) - { - if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) - { - auto tensor_format = _mapper.tensor_argument()->format(); - auto shape = tensor_format.shape; - - _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full); - if(_ls_width_part.size() != 0) - { - _writer->write_text("if(" + coord + " > 0)\n"); - _writer->compound_statement_begin(); - } - } - }; - - void out_of_bound_finalize_x() - { - if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) - { - if(_ls_width_part.size() != 0) - { - _writer->compound_statement_end(); - _writer->write_text("else\n"); - _writer->compound_statement_begin(); - - out_of_bound_initialize_z(_coord_orig_z); - for(auto &i : _leftovers_x) - { - out_of_bound_initialize_y(i.first.second); - _writer->write_text(i.second); - _writer->write_text(";\n"); - out_of_bound_finalize_y(i.first.first); - } - out_of_bound_finalize_z(); - _writer->compound_statement_end(); - } - } - }; - - void out_of_bound_initialize_y(std::string& coord) - { - std::string max = ""; - - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch(address_mode_y) - { - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::ClampToBorder: - // NOTE: This line should not be moved outside of the switch statement. - // The reason for that is because when we query the component, the component is marked as used - // and added to the list of arguments of the kernel. Since, not in all cases this component is required, - // we should request the component only when used - max = _mapper.tensor_component_y(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - max = _mapper.tensor_component_y(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::ClampToNearest: - max = _mapper.tensor_component_y(); - coord = "clamp(" + coord + ", 0, " + max + " - 1)"; - break; - case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - max = _mapper.tensor_component_y(); - coord = "min(" + coord + ", " + max + " - 1)"; - break; - case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - coord = "max(" + coord + ", 0)"; - break; - case TensorSamplerAddressModeY::None: - break; - default: - std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; - assert(false); - } - }; - - void out_of_bound_finalize_y(const std::string& dst) - { - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch(address_mode_y) - { - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - _writer->compound_statement_end(); - break; - - default: - assert(false); - } - - switch(address_mode_y) - { - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - _writer->write_text("else\n"); - _writer->compound_statement_begin(); - _writer->write_text(dst); - _writer->write_text(" = 0.0f;\n"); - _writer->compound_statement_end(); - break; - - default: - assert(false); - } - }; - - void out_of_bound_initialize_z(std::string& coord) - { - std::string max = ""; - - const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - - switch(address_mode_z) - { - case TensorSamplerAddressModeZ::Skip: - max = _mapper.tensor_component_z(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeZ::SkipMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: - max = _mapper.tensor_component_z(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeZ::ClampToNearest: - max = _mapper.tensor_component_z(); - coord = "clamp(" + coord + ", 0, " + max + " - 1)"; - break; - case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly: - max = _mapper.tensor_component_z(); - coord = "min(" + coord + ", " + max + " - 1)"; - break; - case TensorSamplerAddressModeZ::ClampToMinEdgeOnly: - coord = "max(" + coord + ", 0)"; - break; - case TensorSamplerAddressModeZ::None: - break; - default: - std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; - assert(false); - } - }; - - void out_of_bound_finalize_z() - { - const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - - switch(address_mode_z) - { - case TensorSamplerAddressModeZ::Skip: - case TensorSamplerAddressModeZ::SkipMinEdgeOnly: - case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: - _writer->compound_statement_end(); - break; - - default: - assert(false); - } - }; - - std::vector decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const - { - std::vector x; - - switch(ls_leftover_vector_width) - { - case 0: - break; - case 1: - case 2: - case 3: - case 4: - case 8: - case 16: - x.push_back(ls_leftover_vector_width); - break; - case 5: - x.push_back(4); - x.push_back(1); - break; - case 6: - x.push_back(4); - x.push_back(2); - break; - case 7: - x.push_back(4); - x.push_back(3); - break; - case 9: - x.push_back(8); - x.push_back(1); - break; - case 10: - x.push_back(8); - x.push_back(2); - break; - case 11: - x.push_back(8); - x.push_back(3); - break; - case 12: - x.push_back(8); - x.push_back(4); - break; - case 13: - x.push_back(8); - x.push_back(4); - x.push_back(1); - break; - case 14: - x.push_back(8); - x.push_back(4); - x.push_back(2); - break; - case 15: - x.push_back(8); - x.push_back(4); - x.push_back(3); - break; - - default: - assert(false); - } - return x; - } - - std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& address) - { - switch(type) - { - case GpuLoadStoreType::Load: - if(vector_width != 1) - { - return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; - } - else - { - return data + " = *(" + address + ")"; - } - break; - case GpuLoadStoreType::Store: - if(vector_width != 1) - { - return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; - } - else - { - return "*(" + address + ") = " + data; - } - break; - default: - std::cout << "Unsupported GpuLoadStoreType" << std::endl; - assert(false); - return ""; - } - } - - std::string to_ls_buffer_address(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const - { - auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); - assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr); - const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); - const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); - - std::string address; - address += "(__global "; - address += dst_type; - address += "*)("; - address += ptr_buf; - if(x != "0" && (_mapper.is_one_component_x() != true)) - { - address += " + ("; - address += x + ") * sizeof(" + dst_type + ")"; - } - if(y != "0" && (_mapper.is_one_component_y() != true)) - { - const std::string stride_y = _mapper.tensor_component_stride_y(); - address += " + ("; - address += y + ")"; - address += " * "; - address += stride_y; - } - if(z != "0" && (_mapper.is_one_component_z() != true)) - { - const std::string stride_z = _mapper.tensor_component_stride_z(); - address += " + ("; - address += z + ")"; - address += " * "; - address += stride_z; - } - if(b != "0" && (_mapper.is_one_component_batch() != true)) - { - const std::string stride_b = _mapper.tensor_component_stride_batch(); - address += " + ("; - address += b + ")"; - address += " * "; - address += stride_b; - } - address += ")"; - return address; - } -}; - -class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter -{ -public: - static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type, IVectorTile *dst) - { - CKW_UNUSED(x); - - if(dst->format().w != 4) - { - return false; - } - if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None) - { - return false; - } - if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None) - { - return false; - } - if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load) - { - return false; - } - if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store) - { - return false; - } - if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16)) - { - return false; - } - return true; - /* - - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4 - - z: Only GpuSamplerAddressModeZ::None is supported - */ - } - ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type) - { - } - - ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default; - ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default; - - void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override - { - assert(validate(_writer, _mapper, _type, dst)); - - _dst = dst; - _ls_width_full = dst->format().w; - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - - /* - if(y) - { - // full load/store width - } - else - { - // no load/store - } - */ - } - - void write(const std::pair& y) override - { - int32_t idx_y = y.first; - std::string coord_y = y.second; - - // The only check required is on Y. - out_of_bound_initialize_y(coord_y); - - const std::string dst = _dst->vector(idx_y).str; - const std::string sampler = to_ls_image2d_sampler(); - const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b); - const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord); - - _writer->write_text(ls_buf); - _writer->write_text(";\n"); - - out_of_bound_finalize_y(dst); - } - - void finalize() override - { - } -private: - IVectorTile* _dst { nullptr }; - int32_t _ls_width_full { 0 }; - std::string _coord_x {}; - std::string _coord_z {}; - std::string _coord_b {}; - - void out_of_bound_initialize_y(std::string& coord) - { - std::string max = ""; - - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch(address_mode_y) - { - case TensorSamplerAddressModeY::Skip: - max = _mapper.tensor_component_y(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - max = _mapper.tensor_component_y(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToNearest: - case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - case TensorSamplerAddressModeY::None: - break; - default: - std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl; - assert(false); - } - }; - - void out_of_bound_finalize_y(const std::string& dst) - { - CKW_UNUSED(dst); - - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch(address_mode_y) - { - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - _writer->compound_statement_end(); - break; - - default: - assert(false); - } - }; - - std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& sampler, const std::string& coord) - { - CKW_UNUSED(vector_width); - - auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); - const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage); - // const DataType dt = _dst->format().dt; - const std::string post_fix = _dst->format().dt == DataType::Fp32? "f" : "h"; - - switch(type) - { - case GpuLoadStoreType::Load: - return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")"; - break; - case GpuLoadStoreType::Store: - return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")"; - default: - assert(false); - std::cout << "Unsupported GpuLoadStoreType" << std::endl; - assert(false); - return ""; - } - } - - std::string to_ls_image2d_sampler() const - { - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch(address_mode_y) - { - case TensorSamplerAddressModeY::None: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST"; - case TensorSamplerAddressModeY::ClampToNearest: - case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST"; - default: - std::cout << "Unsupported address_mode_coord" << std::endl; - assert(false); - return ""; - } - } - - std::string to_ls_image2d_coord(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const - { - std::string coord_x = "(" + x + ") >> 2"; - std::string coord_y = "("; - - if(y != "0" && (_mapper.is_one_component_y() != true)) - { - coord_y += y; - } - if(z != "0" && (_mapper.is_one_component_z() != true)) - { - const std::string dim = _mapper.tensor_component_y(); - coord_y += " + ("; - coord_y += z + ")"; - coord_y += " * "; - coord_y += dim; - } - if(b != "0" && (_mapper.is_one_component_batch() != true)) - { - const std::string dim0 = _mapper.tensor_component_y(); - const std::string dim1 = _mapper.tensor_component_z(); - coord_y += " + ("; - coord_y += b + ")"; - coord_y += " * "; - coord_y += dim0; - coord_y += " * "; - coord_y += dim1; - } - coord_y += ")"; - return "(int2)(" + coord_x + ", " + coord_y + ")"; - } -}; - -/** IGpuLoadStoreHelperWriter factory class */ -class ClLoadStoreHelperWriterFactory final -{ -public: - /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper - * - * - * @return IGpuLoadStoreHelperWriter - */ - static std::unique_ptr create(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) - { - const auto tensor_storage = mapper.gpu_sampler().storage; - switch(tensor_storage) - { - case GpuSamplerTensorStorage::BufferUint8Ptr: - return std::make_unique(x, mapper, type); - case GpuSamplerTensorStorage::Image2dReadOnly: - case GpuSamplerTensorStorage::Image2dWriteOnly: - return std::make_unique(x, mapper, type); - default: - std::cout << "Unsupported Gpu tensor storage" << std::endl; - assert(false); - return nullptr; - } - } -}; - -// This utility method needs to go in utils.h -inline bool is_tile_scalar(IVectorTile* x) -{ - return x->format().w == 1 && x->format().h == 1; -} - -class ClKernelWriter : public IGpuKernelWriter -{ -public: - ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) - { - _data = x; - _attr = attr; - } - - ClKernelWriter(const ClKernelWriter &) = default; - ClKernelWriter &operator=(const ClKernelWriter &) = default; - - // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure - // there are no conflicts or ambiguity in the code - void set_IdSpace(int32_t id) override - { - _data->tiles.set_IdSpace(id); - _data->arguments.set_IdSpace(id); - } - - void import_tile(const std::string& dst_name, const IVectorTile *src) override - { - _data->tiles.insert(dst_name, src); - } - - void declare_argument(const std::string& name, const TensorInfo& tensor) override - { - assert(_data->arguments[name] == nullptr); - _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value); - } - - void declare_tile(const std::string& name, const TileInfo& format) override - { - assert(_data->tiles[name] == nullptr); - _data->tiles.insert(name, format); - - IVectorTile *x = _data->tiles[name]; - - for(auto &t : x->underlying_source_variables()) - { - _data->code += t.type.str + " " + t.str + ";\n"; - } - } - - void declare_const_tile(const std::string& name, const std::vector>& in, DataType dt) override - { - assert(_data->tiles[name] == nullptr); - _data->tiles.insert(name, in, dt); - // Note: A constant does not need to be declared in the code - } - - void write_text(const std::string& x) override - { - _data->code += x; - } - - void compound_statement_begin() override - { - _data->tiles.increment_registry_level(); - _data->code += "{\n"; - } - - void compound_statement_end() override - { - _data->tiles.decrement_registry_level(); - _data->code += "}\n"; - } - - void op_get_global_id(const Operand& dst_var, int32_t dim) override - { - assert(dst_var.type() == OperandType::Tile); - assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && - _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable - - auto var = _data->tiles[dst_var.value()]; - - _data->code += var->scalar(0, 0).str; - _data->code += " = get_global_id("; - _data->code += std::to_string(dim); - _data->code += ");\n"; - }; - - void op_get_global_coord(const Operand& o_dst, const Operand& o_step, const TensorOperand& o_tensor, int32_t dim) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto step = operands.unpack(o_step); - - // Validation: Check that x, y and z are scalar - - TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - switch (dim) - { - case 0: - if(mapper.is_one_component_x()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) - { - // Validation: Check: fixed tensor shape - // TO BE CHANGED - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(0) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(0) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - } - break; - case 1: - if(mapper.is_one_component_y()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) - { - - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(1) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - } - break; - case 2: - if(mapper.is_one_component_z()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(2) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - break; - default: - break; - } - }; - - void op_get_global_batch(const Operand& o_dst, const TensorOperand& o_tensor) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - - TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - if(mapper.is_one_component_batch()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - std::cout << "Unsupported batched computation" << std::endl; - assert(false); - } - }; - - void op_get_global_size(const Operand& dst_var, int32_t dim) override - { - assert(dst_var.type() == OperandType::Tile); - assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && - _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable - - auto var = _data->tiles[dst_var.value()]; - - _data->code += var->scalar(0, 0).str; - _data->code += " = get_global_size("; - _data->code += std::to_string(dim); - _data->code += ");\n"; - } - - void op_binary_expression(const Operand& dst_name, const Operand& lhs_name, BinaryOp op, const Operand& rhs_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto lhs = operands.unpack(lhs_name); - auto rhs = operands.unpack(rhs_name); - auto dst = operands.unpack(dst_name); - - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - assert(lhs != nullptr); - const int32_t lhs_w = lhs->format().w; - const int32_t rhs_w = rhs->format().w; - - if(op == BinaryOp::MatMul_Nt_T) - { - assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16)); - for(int32_t y = 0; y < dst_h; ++y) - { - for(int32_t x = 0; x < dst_w; ++x) - { - for(int32_t k = 0; k < lhs_w; ++k) - { - _data->code += dst->scalar(x, y).str; - _data->code += " = fma("; - _data->code += lhs->scalar(k, y).str; - _data->code += ", "; - _data->code += rhs->scalar(k, x).str; - _data->code += ", "; - _data->code += dst->scalar(x, y).str; - _data->code += ");\n"; - } - } - } - - return; - } - - bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1; - bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1; - - std::string lhs_prefix = broadcast_lhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - std::string rhs_prefix = broadcast_rhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - std::string op_str = to_string(op); - - // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - _data->code += lhs_prefix + lhs->vector(y).str; - _data->code += " "; - _data->code += op_str; - _data->code += " "; - _data->code += rhs_prefix + rhs->vector(y).str; - _data->code += ";\n"; - } - }; - - void op_cast_expression(const Operand& o_dst, const Operand &o_src, ConvertPolicy policy) override - { - CKW_UNUSED(policy); - - OperandUnpacker operands(_data->tiles, _data->arguments); - auto src = operands.unpack(o_src); - auto dst = operands.unpack(o_dst); - - // const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const std::string dt = dst->scalar(0, 0).type.str; - - // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = convert_" + dt + "("; - _data->code += src->vector(y).str; - _data->code += ");\n"; - } - }; - - void op_assign(const Operand& dst_name, const Operand& src_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto src = operands.unpack(src_name); - auto dst = operands.unpack(dst_name); - - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const int32_t src_w = src->format().w; - // const int32_t src_h = src->format().h; - const std::string dt = dst->scalar(0, 0).type.str; - - bool broadcast_src_x = dst_w != 1 && src_w == 1; - - std::string src_prefix = broadcast_src_x? "(" + dt + ")" : ""; - - // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - _data->code += src_prefix + src->vector(y).str; - _data->code += ";\n"; - } - } - - void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto src = operands.unpack(src_name); - auto dst = operands.unpack(dst_name); - - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const int32_t src_w = src->format().w; - // const int32_t src_h = src->format().h; - const std::string dt = dst->scalar(0, 0).type.str; - - bool broadcast_src_x = dst_w != 1 && src_w == 1; - - std::string src_prefix = broadcast_src_x? "(" + dt + ")" : ""; - - // Broadcasting on Y is automatic - for(int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - - switch(func) - { - case ScalarUnaryFunction::Exp: - _data->code += "exp("; - break; - - default: - CKW_ASSERT(false); - } - - _data->code += src_prefix + src->vector(y).str; - _data->code += ");\n"; - } - } - - void op_if(const Operand& o_lhs, BinaryOp op, const Operand& o_rhs) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto lhs = operands.unpack(o_lhs); - auto rhs = operands.unpack(o_rhs); - - assert(is_tile_scalar(lhs)); - assert(is_tile_scalar(rhs)); - - _data->code += "if("; - _data->code += lhs->scalar(0, 0).str; - _data->code += " "; - _data->code += to_string(op); - _data->code += " "; - _data->code += rhs->scalar(0, 0).str; - _data->code += ")\n"; - } - - void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, AssignmentOp update_op, const Operand& update_value_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto var = operands.unpack(var_name); - auto cond_value = operands.unpack(cond_value_name); - auto update_value = operands.unpack(update_value_name); - - const int32_t dst_w = var->format().w; - const int32_t dst_h = var->format().h; - - // It must be a scalar variable - CKW_UNUSED(dst_w, dst_h); - assert(dst_w == 1); - assert(dst_h == 1); - - _data->code += "for(; " ; - _data->code += var->scalar(0, 0).str; - _data->code += " "; - _data->code += to_string(cond_op); - _data->code += " " + cond_value->scalar(0, 0).str + "; "; - _data->code += var->scalar(0, 0).str; - _data->code += " "; - _data->code += to_string(update_op); - _data->code += " " + update_value->scalar(0, 0).str + ")"; - _data->code += "\n"; - } - - void op_load_immediate(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_y, const Operand& o_z, const Operand& o_batch_idx, const Operand& dilation_y) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto x = operands.unpack(o_x); - auto y = operands.unpack(o_y); - auto z = operands.unpack(o_z); - auto dil_y = operands.unpack(dilation_y); - auto b = operands.unpack(o_batch_idx); - - TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load); - - // Initialize the constant part - load_writer->initialize(dst, x, z, b); - - for(int i = 0; i < dst->format().h; ++i) - { - std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i); - if(dil_y->scalar(0, 0).str != "1") - { - coord_y += " * " + dil_y->scalar(0, 0).str; - } - load_writer->write(std::make_pair(i, coord_y)); - } - - load_writer->finalize(); - } - - void op_load_indirect(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_indirect_h, const Operand& o_z, const Operand& o_batch_idx) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto x = operands.unpack(o_x); - auto y_ind = operands.unpack(o_indirect_h); - auto z = operands.unpack(o_z); - auto b = operands.unpack(o_batch_idx); - - TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load); - - // Initialize the constant part - load_writer->initialize(dst, x, z, b); - - for(int i = 0; i < dst->format().h; ++i) - { - load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str)); - } - - load_writer->finalize(); - } - - void op_store_immediate(const TensorOperand& tensor_name, const Operand& src_name, const Operand& x_name, const Operand& y_name, const Operand& z_name, const Operand& batch_index_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto src = operands.unpack(src_name); - auto x = operands.unpack(x_name); - auto y = operands.unpack(y_name); - auto z = operands.unpack(z_name); - auto b = operands.unpack(batch_index_name); - - TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(tensor_name); - auto gpu_sampler = tensor_name.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store); - - // Initialize the constant part - store_writer->initialize(src, x, z, b); - - int32_t tile_h = src->format().h; - - for(int m0 = tile_h - 1; m0 >= 0; m0--) - { - store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0))); - } - - store_writer->finalize(); - } - - void op_return() override - { - _data->code += "return;\n"; - } - - void util_get_indirect_buffer(const Operand& o_dst, const TensorOperand& o_tensor, const Operand& o_x, const Operand& o_y, const Operand& o_x_off, const Operand& o_y_off) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto x = operands.unpack(o_x); - auto y = operands.unpack(o_y); - auto x_off = operands.unpack(o_x_off); - auto y_off = operands.unpack(o_y_off); - - TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - - assert(dst->format().w == 1); - assert(x->format().w == 1); - assert(y->format().w == 1); - assert(x_off->format().w == 1); - assert(y_off->format().w == 1); - assert(dst->format().dt == DataType::Int32); - assert(x->format().dt == DataType::Int32); - assert(y->format().dt == DataType::Int32); - assert(x_off->format().dt == DataType::Int32); - assert(y_off->format().dt == DataType::Int32); - - const std::string width = tensor->component(TensorComponent::W); - const std::string height = tensor->component(TensorComponent::H); - const std::string wxh = tensor->component(TensorComponent::WxH); - /* - int x_s; - int y_s; - x_s = (xi_0 + x_k); - y_s = (yi_0 + y_k); - mi_0 = x_s + y_s * width + b * widthxheight; - mi_0 = select(-1, mi_0, x_s >= 0); - mi_0 = select(-1, mi_0, y_s >= 0); - mi_0 = select(-1, mi_0, x_s < 128); - mi_0 = select(-1, mi_0, y_s < 128); - */ - compound_statement_begin(); - declare_tile("_x_s", TileInfo(DataType::Int32)); - declare_tile("_y_s", TileInfo(DataType::Int32)); - auto x_s = operands.unpack(Operand("_x_s")); - auto y_s = operands.unpack(Operand("_y_s")); - for(int i = 0; i < dst->format().h; ++i) - { - // x_s = (xi_0 + x_k); - // y_s = (yi_0 + y_k); - _data->code += x_s->scalar(0, i).str; - _data->code += " = ("; - _data->code += x->scalar(0, i).str; - _data->code += " + "; - _data->code += x_off->scalar(0, i).str; - _data->code += ");\n"; - _data->code += y_s->scalar(0, i).str; - _data->code += " = ("; - _data->code += y->scalar(0, i).str; - _data->code += " + "; - _data->code += y_off->scalar(0, i).str; - _data->code += ");\n"; - // mi_0 = x_s + y_s * width; - _data->code += dst->scalar(0, i).str; - _data->code += " = "; - _data->code += x_s->scalar(0, i).str; - _data->code += " + "; - _data->code += y_s->scalar(0, i).str; - _data->code += " * " + width + ";\n"; - // mi_0 = select(wxh, mi_0, x_s >= 0); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += x_s->scalar(0, i).str; - _data->code += " >= 0);\n"; - // mi_0 = select(wxh, mi_0, y_s >= 0); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += y_s->scalar(0, i).str; - _data->code += " >= 0);\n"; - // mi_0 = select(wxh, mi_0, x_s < width); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += x_s->scalar(0, i).str; - _data->code += " < "; - _data->code += width + ");\n"; - // mi_0 = select(wxh, mi_0, y_s < height); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += y_s->scalar(0, i).str; - _data->code += " < "; - _data->code += height + ");\n"; - } - compound_statement_end(); - } - -private: - GpuKernelWriterDataHolder* _data { nullptr }; - GpuKernelWriterAttribute * _attr { nullptr }; -}; - -/** IGpuKernelWriter factory class */ -class GpuKernelWriterFactory final -{ -public: - /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language - * - * @param[in] gpu GPU target - * - * @return IGpuKernelWriter - */ - static std::unique_ptr create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) - { - switch(x->programming_language()) - { - case GpuTargetLanguage::OpenCL: - return std::make_unique(attr, x); - default: - std::cout << "Unsupported Gpu programming language" << std::endl; - assert(false); - return nullptr; - } - } -}; - -inline int32_t adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx) -{ - auto tensor = tensor_info_id->shape; - - int32_t dim[3] = {0}; - - switch(tensor_format) - { - case TensorSamplerFormat::C_W_H: - dim[0] = tensor[0]; - dim[1] = tensor[1]; - dim[2] = tensor[2]; - break; - case TensorSamplerFormat::C_WH_1: - dim[0] = tensor[0]; - dim[1] = tensor[1] * tensor[2]; - dim[2] = 1; - break; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; - } - - return std::min(step, dim[idx]); -} - -} // namespace prototype -} // namespace ckw - -#endif // CKW_SRC_PROTOTYPE_H diff --git a/compute_kernel_writer/src/TensorOperand.cpp b/compute_kernel_writer/src/TensorOperand.cpp deleted file mode 100644 index 00ecc3824e..0000000000 --- a/compute_kernel_writer/src/TensorOperand.cpp +++ /dev/null @@ -1,247 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TensorOperand.h" -#include "ckw/Error.h" -#include "ckw/Kernel.h" -#include "ckw/TileOperand.h" -#include "src/Prototype.h" - -namespace ckw -{ - -namespace -{ - -inline TensorComponentOperand &get_or_create_component(std::unique_ptr &ptr, const ::std::string &name, TensorComponent component) -{ - if(ptr == nullptr) - { - ptr = std::make_unique(name, component); - } - - return *ptr; -} - -} // namespace - -// ================================================================================================= -// TensorOperand -// ================================================================================================= - -TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info) - : OperandBase(name), _info(info) -{ -} - -prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const -{ - CKW_UNUSED(writer); - return { name() }; -} - -const TensorInfo &TensorOperand::info() const -{ - return _info; -} - -TensorInfo &TensorOperand::info() -{ - return _info; -} - -DataType TensorOperand::data_type() const -{ - return _info.data_type(); -} - -bool TensorOperand::is_constant() const -{ - return false; -} - -const TileOperand &TensorOperand::tile() const -{ - return *_tile; -} - -TileOperand &TensorOperand::tile() -{ - return *_tile; -} - -TensorOperand &TensorOperand::tile(TileOperand &tile) -{ - _tile = &tile; - return *this; -} - -const TensorTileSampler &TensorOperand::tile_sampler() const -{ - return _tile_sampler; -} - -TensorTileSampler &TensorOperand::tile_sampler() -{ - return _tile_sampler; -} - -TensorOperand &TensorOperand::tile_sampler(const TensorTileSampler &value) -{ - _tile_sampler = value; - return *this; -} - -TileOperand &TensorOperand::stride1() -{ - return get_or_create_component(_stride1, name(), TensorComponent::Stride1); -} - -TileOperand &TensorOperand::stride2() -{ - return get_or_create_component(_stride2, name(), TensorComponent::Stride2); -} - -TileOperand &TensorOperand::stride3() -{ - return get_or_create_component(_stride3, name(), TensorComponent::Stride3); -} - -TileOperand &TensorOperand::stride4() -{ - return get_or_create_component(_stride4, name(), TensorComponent::Stride4); -} - -TileOperand &TensorOperand::dim0() -{ - return get_or_create_component(_dim0, name(), TensorComponent::Dim0); -} - -TileOperand &TensorOperand::dim1() -{ - return get_or_create_component(_dim1, name(), TensorComponent::Dim1); -} - -TileOperand &TensorOperand::dim2() -{ - return get_or_create_component(_dim2, name(), TensorComponent::Dim2); -} - -TileOperand &TensorOperand::dim3() -{ - return get_or_create_component(_dim3, name(), TensorComponent::Dim3); -} - -TileOperand &TensorOperand::dim4() -{ - return get_or_create_component(_dim4, name(), TensorComponent::Dim4); -} - -TileOperand &TensorOperand::dim1_dim2() -{ - return get_or_create_component(_dim1_dim2, name(), TensorComponent::Dim1xDim2); -} - -TileOperand &TensorOperand::dim1_dim2_dim3() -{ - return get_or_create_component(_dim1_dim2_dim3, name(), TensorComponent::Dim1xDim2xDim3); -} - -TileOperand &TensorOperand::offset_first_element_in_bytes() -{ - return get_or_create_component(_offset_first_element_in_bytes, name(), TensorComponent::OffsetFirstElement); -} - -// ================================================================================================= -// TensorComponentOperand -// ================================================================================================= - -TensorComponentOperand::TensorComponentOperand(const ::std::string &name, TensorComponent component) - : TileOperand(name, DataType::Int32), _component(component) -{ -} - -prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const -{ - CKW_UNUSED(writer); - prototype::OperandType type{ prototype::OperandType::Unknown }; - - switch(_component) - { - case TensorComponent::OffsetFirstElement: - type = prototype::OperandType::TensorDataOffset; - break; - - case TensorComponent::Stride1: - type = prototype::OperandType::TensorStride1; - break; - - case TensorComponent::Stride2: - type = prototype::OperandType::TensorStride2; - break; - - case TensorComponent::Stride3: - type = prototype::OperandType::TensorStride3; - break; - - case TensorComponent::Stride4: - type = prototype::OperandType::TensorStride4; - break; - - case TensorComponent::Dim0: - type = prototype::OperandType::TensorDim0; - break; - - case TensorComponent::Dim1: - type = prototype::OperandType::TensorDim1; - break; - - case TensorComponent::Dim2: - type = prototype::OperandType::TensorDim2; - break; - - case TensorComponent::Dim3: - type = prototype::OperandType::TensorDim3; - break; - - case TensorComponent::Dim4: - type = prototype::OperandType::TensorDim4; - break; - - case TensorComponent::Dim1xDim2: - type = prototype::OperandType::TensorDim1xDim2; - break; - - case TensorComponent::Dim1xDim2xDim3: - type = prototype::OperandType::TensorDim1xDim2xDim3; - break; - - default: - CKW_ASSERT(false); - } - - return prototype::Operand(name(), type); -} - -} // namespace ckw diff --git a/compute_kernel_writer/src/TensorTileSampler.cpp b/compute_kernel_writer/src/TensorTileSampler.cpp deleted file mode 100644 index 143d550dec..0000000000 --- a/compute_kernel_writer/src/TensorTileSampler.cpp +++ /dev/null @@ -1,167 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TensorTileSampler.h" -#include "ckw/TileOperand.h" -#include "ckw/Types.h" - -namespace ckw -{ - -TensorTileSampler::TensorTileSampler() -{ -} - -TensorTileSampler::TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), _y(&y), _z(&z), _b(&b), _height(0), _width(0), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z) -{ -} - -TensorTileSampler::TensorTileSampler( - TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b, - int32_t height, int32_t width, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), _y(&y), _z(&z), _b(&b), _height(height), _width(width), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z) -{ -} - -const TileOperand &TensorTileSampler::x() const -{ - return *_x; -} - -TensorTileSampler &TensorTileSampler::x(TileOperand &x) -{ - _x = &x; - return *this; -} - -const TileOperand &TensorTileSampler::y() const -{ - return *_y; -} - -TensorTileSampler &TensorTileSampler::y(TileOperand &y) -{ - _y = &y; - return *this; -} - -const TileOperand &TensorTileSampler::z() const -{ - return *_z; -} - -TensorTileSampler &TensorTileSampler::z(TileOperand &z) -{ - _z = &z; - return *this; -} - -const TileOperand &TensorTileSampler::b() const -{ - return *_b; -} - -TensorTileSampler &TensorTileSampler::b(TileOperand &b) -{ - _b = &b; - return *this; -} - -int32_t TensorTileSampler::width() const -{ - return _width; -} - -TensorTileSampler &TensorTileSampler::width(int32_t width) -{ - _width = width; - return *this; -} - -int32_t TensorTileSampler::height() const -{ - return _height; -} - -TensorTileSampler &TensorTileSampler::height(int32_t height) -{ - _height = height; - return *this; -} - -TensorSamplerFormat TensorTileSampler::format() const -{ - return _format; -} - -TensorTileSampler &TensorTileSampler::format(TensorSamplerFormat format) -{ - _format = format; - return *this; -} - -TensorSamplerAddressModeX TensorTileSampler::address_mode_x() const -{ - return _address_mode_x; -} - -TensorTileSampler &TensorTileSampler::address_mode_x(TensorSamplerAddressModeX address_mode_x) -{ - _address_mode_x = address_mode_x; - return *this; -} - -TensorSamplerAddressModeY TensorTileSampler::address_mode_y() const -{ - return _address_mode_y; -} - -TensorTileSampler &TensorTileSampler::address_mode_y(TensorSamplerAddressModeY address_mode_y) -{ - _address_mode_y = address_mode_y; - return *this; -} - -TensorSamplerAddressModeZ TensorTileSampler::address_mode_z() const -{ - return _address_mode_z; -} - -TensorTileSampler &TensorTileSampler::address_mode_z(TensorSamplerAddressModeZ address_mode_z) -{ - _address_mode_z = address_mode_z; - return *this; -} - -} // namespace ckw diff --git a/compute_kernel_writer/src/TileOperand.cpp b/compute_kernel_writer/src/TileOperand.cpp deleted file mode 100644 index 091947628d..0000000000 --- a/compute_kernel_writer/src/TileOperand.cpp +++ /dev/null @@ -1,104 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TileOperand.h" -#include "ckw/Error.h" -#include "src/Prototype.h" - -namespace ckw -{ - -TileOperand::TileOperand(const std::string &name, const TileInfo &info) - : OperandBase(name), _info(info), _value{ 0 }, _constant(false) -{ -} - -TileOperand::TileOperand(const std::string &name, DataType data_type) - : OperandBase(name), _info(TileInfo{ data_type }), _value(0), _constant(false) -{ -} - -TileOperand::TileOperand(const std::string &name, int32_t value) - : OperandBase(name), _info(TileInfo{ DataType::Int32 }), _value(value), _constant(true) -{ -} - -TileOperand::TileOperand(const std::string &name, float value) - : OperandBase(name), _info(TileInfo{ DataType::Fp32 }), _value(value), _constant(true) -{ -} - -prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const -{ - CKW_UNUSED(writer); - - if(_constant) - { - switch(_info.data_type()) - { - case DataType::Int32: - return prototype::Operand(std::to_string(_value.get()), prototype::OperandType::ScalarInt32); - - case DataType::Fp32: - return prototype::Operand(std::to_string(_value.get()), prototype::OperandType::ScalarFp32); - - default: - CKW_ASSERT(false); - } - } - else - { - return prototype::Operand(name(), prototype::OperandType::Tile); - } -} - -const TileInfo &TileOperand::tile_info() const -{ - return _info; -} - -DataType TileOperand::data_type() const -{ - return _info.data_type(); -} - -bool TileOperand::is_constant() const -{ - return _constant; -} - -bool TileOperand::is_scalar() const -{ - return _info.width() == 1 && _info.height() == 1; -} - -ScalarValue TileOperand::scalar_value() const -{ - CKW_ASSERT(is_scalar()); - CKW_ASSERT(is_constant()); - - return _value; -} - -} // namespace ckw diff --git a/compute_kernel_writer/src/acl/AclComponentArgument.cpp b/compute_kernel_writer/src/acl/AclComponentArgument.cpp deleted file mode 100644 index 5cb909021e..0000000000 --- a/compute_kernel_writer/src/acl/AclComponentArgument.cpp +++ /dev/null @@ -1,97 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "acl/AclComponentArgument.h" -#include "ckw/Error.h" - -AclComponentArgument::AclComponentArgument() -{ -} - -AclComponentArgument::AclComponentArgument(ckw::TensorOperand &tensor) - : _tensor(&tensor) -{ -} - -AclComponentArgument &AclComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler) -{ - CKW_ASSERT(_tile == nullptr); - - _tile = &tile; - _tile_sampler = tile_sampler; - - return *this; -} - -bool AclComponentArgument::has_tensor() const -{ - return _tensor != nullptr; -} - -ckw::TensorOperand &AclComponentArgument::tensor() -{ - CKW_ASSERT(_tensor != nullptr); - - return *_tensor; -} - -const ckw::TensorOperand &AclComponentArgument::tensor() const -{ - CKW_ASSERT(_tensor != nullptr); - - return *_tensor; -} - -bool AclComponentArgument::has_tile() const -{ - return _tile != nullptr; -} - -ckw::TileOperand &AclComponentArgument::tile() -{ - CKW_ASSERT(_tile != nullptr); - - return *_tile; -} - -const ckw::TileOperand &AclComponentArgument::tile() const -{ - CKW_ASSERT(_tile != nullptr); - - return *_tile; -} - -ckw::TensorTileSampler &AclComponentArgument::tile_sampler() -{ - CKW_ASSERT(_tile != nullptr); - - return _tile_sampler; -} - -const ckw::TensorTileSampler &AclComponentArgument::tile_sampler() const -{ - CKW_ASSERT(_tile != nullptr); - - return _tile_sampler; -} diff --git a/compute_kernel_writer/src/acl/AclKernelWriter.cpp b/compute_kernel_writer/src/acl/AclKernelWriter.cpp deleted file mode 100644 index a44e798c61..0000000000 --- a/compute_kernel_writer/src/acl/AclKernelWriter.cpp +++ /dev/null @@ -1,50 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "acl/AclKernelWriter.h" -#include "acl/AclComponentArgument.h" -#include "ckw/Error.h" -#include "ckw/TileInfo.h" - -AclKernelWriter::AclKernelWriter(ckw::Kernel &kernel) - : KernelWriter(kernel) -{ -} - -void AclKernelWriter::op_load_once(AclComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler) -{ - if(!tensor_or_tile->has_tile()) - { - CKW_ASSERT(tensor_or_tile->has_tensor()); - - auto &tensor = tensor_or_tile->tensor(); - - const auto tile_name = tensor.name() + "_tile"; - auto &tile = declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); - - op_load(tile, tensor, sampler); - - tensor_or_tile->init_virtual_tensor(tile, sampler); - } -} diff --git a/compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp b/compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp deleted file mode 100644 index 2a73d47592..0000000000 --- a/compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp +++ /dev/null @@ -1,58 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "acl/AclScopedKernelWriter.h" -#include "acl/AclKernelWriter.h" - -AclScopedKernelWriter::AclScopedKernelWriter(AclKernelWriter *writer) - : _writer(writer), _parent_id_space(writer->id_space()) -{ - _writer->next_id_space(); -} - -AclScopedKernelWriter::AclScopedKernelWriter(const AclScopedKernelWriter &other) - : _writer(other._writer), _parent_id_space(other._writer->id_space()) -{ - _writer->next_id_space(); -} - -AclKernelWriter *AclScopedKernelWriter::operator->() -{ - return _writer; -} - -const AclKernelWriter *AclScopedKernelWriter::operator->() const -{ - return _writer; -} - -AclKernelWriter *AclScopedKernelWriter::writer() -{ - return _writer; -} - -const AclKernelWriter *AclScopedKernelWriter::writer() const -{ - return _writer; -} diff --git a/filelist.json b/filelist.json index a8f6ffc19c..2214e47459 100644 --- a/filelist.json +++ b/filelist.json @@ -2338,9 +2338,12 @@ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp", + "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp", "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp" ] diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py index 56b33f3922..11ca6c45f5 100755 --- a/scripts/clang_tidy_rules.py +++ b/scripts/clang_tidy_rules.py @@ -28,7 +28,7 @@ import re import sys def get_list_includes(): - return "compute_kernel_writer/include " \ + return "compute_kernel_writer/prototype/include " \ "src/cpu/kernels/assembly " \ "src/core/NEON/kernels/assembly " \ "src/core/NEON/kernels/convolution/winograd " \ diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp new file mode 100644 index 0000000000..4b4c22fa1d --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp @@ -0,0 +1,108 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h" +#include "ckw/Error.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ + +GpuCkwComponentArgument::GpuCkwComponentArgument() +{ +} + +GpuCkwComponentArgument::GpuCkwComponentArgument(ckw::TensorOperand &tensor) + : _tensor(&tensor) +{ +} + +GpuCkwComponentArgument &GpuCkwComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler) +{ + CKW_ASSERT(_tile == nullptr); + + _tile = &tile; + _tile_sampler = tile_sampler; + + return *this; +} + +bool GpuCkwComponentArgument::has_tensor() const +{ + return _tensor != nullptr; +} + +ckw::TensorOperand &GpuCkwComponentArgument::tensor() +{ + CKW_ASSERT(_tensor != nullptr); + + return *_tensor; +} + +const ckw::TensorOperand &GpuCkwComponentArgument::tensor() const +{ + CKW_ASSERT(_tensor != nullptr); + + return *_tensor; +} + +bool GpuCkwComponentArgument::has_tile() const +{ + return _tile != nullptr; +} + +ckw::TileOperand &GpuCkwComponentArgument::tile() +{ + CKW_ASSERT(_tile != nullptr); + + return *_tile; +} + +const ckw::TileOperand &GpuCkwComponentArgument::tile() const +{ + CKW_ASSERT(_tile != nullptr); + + return *_tile; +} + +ckw::TensorTileSampler &GpuCkwComponentArgument::tile_sampler() +{ + CKW_ASSERT(_tile != nullptr); + + return _tile_sampler; +} + +const ckw::TensorTileSampler &GpuCkwComponentArgument::tile_sampler() const +{ + CKW_ASSERT(_tile != nullptr); + + return _tile_sampler; +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h new file mode 100644 index 0000000000..80f91389a0 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h @@ -0,0 +1,122 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWCOMPONENTARGUMENT_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWCOMPONENTARGUMENT_H + +#include "ckw/TensorTileSampler.h" + +namespace ckw +{ +class TensorOperand; +class TileOperand; +} // namespace ckw + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ + +/** The argument of a dynamic fusion component which can be either user tensor or virtual tensor. */ +class GpuCkwComponentArgument +{ +public: + /** Initialize a new instance of @ref GpuCkwComponentArgument class for empty virtual tensor. */ + GpuCkwComponentArgument(); + + /** Initialize a new instance of @ref GpuCkwComponentArgument class for user tensor. + * + * @param[in] tensor The user tensor. + */ + explicit GpuCkwComponentArgument(ckw::TensorOperand &tensor); + + /** Set virtual tensor information (tile, sampler) for the argument. + * + * If the component is a user tensor, it can be treated as virtual tensor as well + * and won't be loaded again using @ref GpuCkwKernelWriter::op_load_once method. + * + * @param[in] tile The tile that has been loaded. + * @param[in] sampler The tensor sampling information that has been used to load the tile. + */ + GpuCkwComponentArgument &init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &sampler); + + /** Get whether the argument is a user tensor. */ + bool has_tensor() const; + + /** Get the tensor operand. + * + * If the tensor is not available, throw an error. + */ + ckw::TensorOperand &tensor(); + + /** Get the tensor operand. + * + * If the tensor is not available, throw an error. + */ + const ckw::TensorOperand &tensor() const; + + /** Get whether the argument contains a tile. + * + * The argument can be either a user tensor that has been loaded, + * or a virtual tensor (i.e. a tile with tensor sampling information). + */ + bool has_tile() const; + + /** Get the tile operand. + * + * If the tile is not available, throw an error. + */ + ckw::TileOperand &tile(); + + /** Get the tile operand. + * + * If the tile is not available, throw an error. + */ + const ckw::TileOperand &tile() const; + + /** Get the tensor sampling information for the tile. + * + * If the tile is not available, throw an error. + */ + ckw::TensorTileSampler &tile_sampler(); + + /** Get the tensor sampling information for the tile. + * + * If the tile is not available, throw an error. + */ + const ckw::TensorTileSampler &tile_sampler() const; + +private: + ckw::TensorOperand *_tensor{ nullptr }; + ckw::TileOperand *_tile{ nullptr }; + ckw::TensorTileSampler _tile_sampler{}; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWCOMPONENTARGUMENT_H diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp index 7c8ec8777d..d5c03c60c5 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp @@ -31,8 +31,8 @@ #include "src/common/utils/Log.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" -#include "acl/AclKernelWriter.h" -#include "acl/AclScopedKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" using namespace ckw; namespace arm_compute @@ -55,10 +55,10 @@ std::string GpuCkwDriver::get_name() std::string GpuCkwDriver::get_code() { ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO")); - ckw::Kernel kernel(get_name().c_str(), GpuTargetLanguage::OpenCL); - AclKernelWriter root_writer(kernel); - AclScopedKernelWriter writer(&root_writer); - GpuCkwVariableTable vtable{}; + ckw::Kernel kernel(get_name().c_str(), GpuTargetLanguage::OpenCL); + GpuCkwKernelWriter root_writer(kernel); + GpuCkwScopedKernelWriter writer(&root_writer); + GpuCkwVariableTable vtable{}; // Global Kernel Writer Driver code diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp new file mode 100644 index 0000000000..ca4f121566 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp @@ -0,0 +1,61 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h" +#include "ckw/Error.h" +#include "ckw/TileInfo.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ + +GpuCkwKernelWriter::GpuCkwKernelWriter(ckw::Kernel &kernel) + : KernelWriter(kernel) +{ +} + +void GpuCkwKernelWriter::op_load_once(GpuCkwComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler) +{ + if(!tensor_or_tile->has_tile()) + { + CKW_ASSERT(tensor_or_tile->has_tensor()); + + auto &tensor = tensor_or_tile->tensor(); + + const auto tile_name = tensor.name() + "_tile"; + auto &tile = declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); + + op_load(tile, tensor, sampler); + + tensor_or_tile->init_virtual_tensor(tile, sampler); + } +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h new file mode 100644 index 0000000000..b916e6b28b --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h @@ -0,0 +1,67 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWKERNELWRITER_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWKERNELWRITER_H + +#include "ckw/KernelWriter.h" +#include "ckw/TensorTileSampler.h" + +namespace ckw +{ +class Kernel; +} // namespace ckw + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ + +class GpuCkwComponentArgument; + +/** Extended implementation of kernel writer for dynamic fusion. */ +class GpuCkwKernelWriter : public ckw::KernelWriter +{ +public: + /** Initialize a new instance of @ref GpuCkwKernelWriter class. + * + * @param[in] kernel The kernel to be generated. + */ + explicit GpuCkwKernelWriter(ckw::Kernel &kernel); + + /** Load the user tensor to the tile in the same component argument if it hasn't been loaded. + * + * @param[in] tensor_or_tile The component argument that is either a user tensor or a virtual tensor. + * @param[in] sampler The tensor sampling information to load the tile. + */ + void op_load_once(GpuCkwComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler); +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWKERNELWRITER_H diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp new file mode 100644 index 0000000000..043fda9e6f --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp @@ -0,0 +1,69 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ + +GpuCkwScopedKernelWriter::GpuCkwScopedKernelWriter(GpuCkwKernelWriter *writer) + : _writer(writer), _parent_id_space(writer->id_space()) +{ + _writer->next_id_space(); +} + +GpuCkwScopedKernelWriter::GpuCkwScopedKernelWriter(const GpuCkwScopedKernelWriter &other) + : _writer(other._writer), _parent_id_space(other._writer->id_space()) +{ + _writer->next_id_space(); +} + +GpuCkwKernelWriter *GpuCkwScopedKernelWriter::operator->() +{ + return _writer; +} + +const GpuCkwKernelWriter *GpuCkwScopedKernelWriter::operator->() const +{ + return _writer; +} + +GpuCkwKernelWriter *GpuCkwScopedKernelWriter::writer() +{ + return _writer; +} + +const GpuCkwKernelWriter *GpuCkwScopedKernelWriter::writer() const +{ + return _writer; +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h new file mode 100644 index 0000000000..4d11b5e3e4 --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h @@ -0,0 +1,73 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWSCOPEDKERNELWRITER_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWSCOPEDKERNELWRITER_H + +#include + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ + +class GpuCkwKernelWriter; + +/** Helper to automatically manage kernel writer ID space. */ +class GpuCkwScopedKernelWriter +{ +public: + /** Initialize a new instance of @ref GpuCkwScopedKernelWriter class. */ + explicit GpuCkwScopedKernelWriter(GpuCkwKernelWriter *writer); + + /** Create a new scope from the specified scoped kernel writer. */ + GpuCkwScopedKernelWriter(const GpuCkwScopedKernelWriter &other); + + /** Assignment is disallowed. */ + GpuCkwScopedKernelWriter &operator=(const GpuCkwScopedKernelWriter &) = delete; + + /** Access the underlying kernel writer. */ + GpuCkwKernelWriter *operator->(); + + /** Access the underlying kernel writer. */ + const GpuCkwKernelWriter *operator->() const; + + /** Get the kernel writer. */ + GpuCkwKernelWriter *writer(); + + /** Get the kernel writer. */ + const GpuCkwKernelWriter *writer() const; + +private: + GpuCkwKernelWriter *_writer; + int32_t _parent_id_space; +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute + +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWSCOPEDKERNELWRITER_H diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp index 85aed282d1..4475586db8 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp @@ -23,8 +23,8 @@ */ #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" -#include "acl/AclKernelWriter.h" -#include "acl/AclScopedKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h" #include @@ -35,7 +35,7 @@ namespace experimental { namespace dynamic_fusion { -AclComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, AclScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias) +GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias) { ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected"); @@ -49,8 +49,8 @@ AclComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCompo if(comp_group.is_intermediate_tensor(tensor)) { // Create a virtual tensor variable - AclComponentArgument var; - auto &&inserted = _vars.emplace(tensor->id(), var); + GpuCkwComponentArgument var; + auto &&inserted = _vars.emplace(tensor->id(), var); return &(inserted.first->second); } else @@ -58,9 +58,9 @@ AclComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCompo // Create a user tensor variable std::stringstream ss; ss << alias << "_t" << abs(tensor->id()); - const auto uniq_name = ss.str(); - AclComponentArgument var{ writer->create_tensor_argument(uniq_name.c_str(), to_ckw(*tensor)) }; - auto &&inserted = _vars.emplace(tensor->id(), var); + const auto uniq_name = ss.str(); + GpuCkwComponentArgument var{ writer->create_tensor_argument(uniq_name.c_str(), to_ckw(*tensor)) }; + auto &&inserted = _vars.emplace(tensor->id(), var); return &(inserted.first->second); } } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h index 170fda451b..1c9cb083ea 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h @@ -24,13 +24,11 @@ #ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE -#include "acl/AclComponentArgument.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h" #include "arm_compute/core/ITensorInfo.h" #include -class AclScopedKernelWriter; - namespace arm_compute { namespace experimental @@ -38,6 +36,7 @@ namespace experimental namespace dynamic_fusion { class GpuKernelComponentGroup; +class GpuCkwScopedKernelWriter; /** A table of all the variables used in the kernel. * @@ -55,12 +54,12 @@ public: * @param[in] tensor Tensor info with which the new variable is associated * @param[in] alias Alias for the variable. Will be used as part of the variable name * - * @return AclComponentArgument* + * @return GpuCkwComponentArgument* */ - AclComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, AclScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias = "unnamed"); + GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias = "unnamed"); private: - std::map _vars{}; + std::map _vars{}; }; } // namespace dynamic_fusion diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h index 15402bc330..62255f1cf6 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h @@ -28,8 +28,6 @@ #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/components/Types.h" -class AclScopedKernelWriter; - namespace arm_compute { class ITensorInfo; @@ -40,6 +38,7 @@ namespace dynamic_fusion /** Forward declaration */ class GpuKernelComponentGroup; class GpuCkwVariableTable; +class GpuCkwScopedKernelWriter; /** An interface used by @ref GpuCkwDriver to write source code for a kernel component * @@ -90,7 +89,7 @@ public: * * @note @p writer can only be passed via value since the new scope is created in the copy constructor */ - virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, AclScopedKernelWriter writer) const = 0; + virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const = 0; /** Get tensor arguments */ ArgumentPack tensors() const { diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp index 9895bbeb77..cba1cfbe40 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp @@ -23,8 +23,8 @@ */ #include "GpuCkwElementwiseBinary.h" -#include "acl/AclKernelWriter.h" -#include "acl/AclScopedKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" #include "ckw/TensorTileSampler.h" @@ -40,11 +40,13 @@ namespace arm_compute { namespace experimental { +namespace dynamic_fusion +{ namespace { /** Create a simple sampler from tile of dimension [m0, n0] */ -inline TensorTileSampler create_simple_sampler(AclScopedKernelWriter &writer, int32_t m0, int32_t n0) +inline TensorTileSampler create_simple_sampler(GpuCkwScopedKernelWriter &writer, int32_t m0, int32_t n0) { TensorTileSampler sampler; @@ -75,32 +77,31 @@ inline TensorTileSampler create_simple_sampler(AclScopedKernelWriter &writer, in } } // namespace -namespace dynamic_fusion -{ GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId id, const ArgumentPack &tensors, const Attributes &attributes) : IGpuCkwComponentDriver{ id, tensors }, _lhs{}, _rhs{}, - _dst{}, - _attributes{ attributes } + _dst{} { + ARM_COMPUTE_UNUSED(attributes); + _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst); } -void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, AclScopedKernelWriter writer) const +void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const { const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window(); const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - AclComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs"); - AclComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs"); - AclComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs"); + GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); // Load the LHS and RHS tiles and prepare the tensor sampler. load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler); diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h index 1a79754d1d..963b92baf9 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h @@ -53,14 +53,13 @@ public: /** Destructor */ ~GpuCkwElementwiseBinary() override = default; // Inherited methods overriden: - virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, AclScopedKernelWriter writer) const override; + virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override; Window get_window() const override; private: const ITensorInfo *_lhs; const ITensorInfo *_rhs; const ITensorInfo *_dst; - Attributes _attributes; }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp index 1a1dfc135a..63555e6064 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp @@ -24,8 +24,8 @@ #include "GpuCkwStore.h" #include "arm_compute/core/Error.h" -#include "compute_kernel_writer/include/acl/AclKernelWriter.h" -#include "compute_kernel_writer/include/acl/AclScopedKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include @@ -41,7 +41,7 @@ GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack &tensor _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); } -void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, AclScopedKernelWriter writer) const +void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const { auto src = vtable.declare_variable(comp_group, writer, _src, "src"); auto dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h index 45cc43fe62..5728ff9f49 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h @@ -48,7 +48,7 @@ public: /** Destructor */ ~GpuCkwStore() override = default; // Inherited methods overriden: - virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, AclScopedKernelWriter writer) const override; + virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override; private: const ITensorInfo *_src; diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h index d94ebd5ce9..ca13329335 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h @@ -24,8 +24,8 @@ #ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_WRITERHELPER #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_WRITERHELPER -#include "acl/AclComponentArgument.h" -#include "acl/AclScopedKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "ckw/TensorTileSampler.h" #include @@ -37,11 +37,11 @@ namespace experimental { namespace dynamic_fusion { -using SamplerCreator = std::function; +using SamplerCreator = std::function; /** Load lhs and rhs tiles of dimension [m0, n0] only when not loaded and prepare the sampler */ -inline void load_lhs_rhs_tiles_and_prepare_sampler(AclScopedKernelWriter &writer, AclComponentArgument *lhs, AclComponentArgument *rhs, int32_t m0, int32_t n0, SamplerCreator create_sampler) +inline void load_lhs_rhs_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &writer, GpuCkwComponentArgument *lhs, GpuCkwComponentArgument *rhs, int32_t m0, int32_t n0, SamplerCreator create_sampler) { if(!lhs->has_tile() && !rhs->has_tile()) { -- cgit v1.2.1