diff options
115 files changed, 230 insertions, 15816 deletions
diff --git a/Android.bp b/Android.bp index 2983e2e21d..670138b209 100644 --- a/Android.bp +++ b/Android.bp @@ -172,6 +172,7 @@ cc_library_static { proprietary: true, local_include_dirs: ["build/android-arm64v8a/src/core", "build/android-arm64v8a/src/core/CL", + "compute_kernel_writer/include", "src/core/common", "src/core/helpers", "src/core/NEON/kernels/arm_gemm", @@ -621,7 +622,6 @@ cc_library_static { "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp", "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp", - "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp", "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp", "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp", "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp", @@ -634,8 +634,6 @@ cc_library_static { "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp", @@ -657,19 +655,6 @@ cc_library_static { "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp", "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp", "src/gpu/cl/ClContext.cpp", "src/gpu/cl/ClKernelLibrary.cpp", "src/gpu/cl/ClQueue.cpp", diff --git a/SConscript b/SConscript index f0c42979ce..a9986feb32 100644 --- a/SConscript +++ b/SConscript @@ -564,12 +564,6 @@ if env['fixed_format_kernels']: # Dynamic fusion if env['experimental_dynamic_fusion']: lib_files += filelist['experimental']['dynamic_fusion']['common'] - lib_files += filelist['experimental']['dynamic_fusion']['template_writer'] - -if "ACL_INTERNAL_TEST_CKW_IN_DF" in env["extra_cxx_flags"]: - if not env["experimental_dynamic_fusion"]: - print("To use ACL_INTERNAL_TEST_CKW_IN_DF experimental_dynamic_fusion must be set to 1") - Exit(1) lib_files += filelist['experimental']['dynamic_fusion']['ckw_driver'] # Logging files diff --git a/SConstruct b/SConstruct index 6f498b51c8..bad85e503d 100644 --- a/SConstruct +++ b/SConstruct @@ -227,9 +227,6 @@ if env['experimental_dynamic_fusion']: # Dynamic Fusion on GPU has a direct dependency on OpenCL and Compute Kernel Writer env['opencl'] = 1 - # Build CKW by default - env["extra_cxx_flags"] += ' -DACL_INTERNAL_TEST_CKW_IN_DF' - if env['opencl'] and env['embed_kernels'] and env['compress_kernels'] and env['os'] not in ['android']: print("Compressed kernels are supported only for android builds") Exit(1) diff --git a/compute_kernel_writer/prototype/CMakeLists.txt b/compute_kernel_writer/prototype/CMakeLists.txt deleted file mode 100644 index 439dcd3b7e..0000000000 --- a/compute_kernel_writer/prototype/CMakeLists.txt +++ /dev/null @@ -1,78 +0,0 @@ -# Copyright (c) 2023 Arm Limited. -# -# SPDX-License-Identifier: MIT -# -# Permission is hereby granted, free of charge, to any person obtaining a copy -# of this software and associated documentation files (the "Software"), to -# deal in the Software without restriction, including without limitation the -# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -# sell copies of the Software, and to permit persons to whom the Software is -# furnished to do so, subject to the following conditions: -# -# The above copyright notice and this permission notice shall be included in all -# copies or substantial portions of the Software. -# -# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -# SOFTWARE. - -cmake_minimum_required(VERSION 3.14 FATAL_ERROR) - -#--------------------------------------------------------------------- -# Prototype - -add_library(ckw_prototype - src/TileInfo.cpp - src/TensorInfo.cpp - src/Kernel.cpp - src/KernelWriter.cpp - src/OperandBase.cpp - src/TileOperand.cpp - src/TensorOperand.cpp - src/TensorTileSampler.cpp - src/KernelArgument.cpp -) - -target_compile_options(ckw_prototype - PUBLIC - ${CKW_CXX_FLAGS} - "$<$<CXX_COMPILER_ID:GNU>:${GNU_WARNINGS}>" - "$<$<CONFIG:Debug>:${CKW_ASSERTS_OPTS}>" - "$<$<BOOL:${CKW_ENABLE_ASSERTS}>:${CKW_ASSERTS_OPTS}>" - ${CMAKE_CXX_FLAGS} - PRIVATE - $<$<CONFIG:Release>:-Os> -) - -target_compile_definitions(ckw_prototype PUBLIC - $<$<CONFIG:Debug>:COMPUTE_KERNEL_WRITER_DEBUG_ENABLED> - $<$<CONFIG:Debug>:COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED> - $<$<BOOL:${CKW_ENABLE_ASSERTS}>:COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED> - $<$<BOOL:${CKW_ENABLE_OPENCL}>:COMPUTE_KERNEL_WRITER_OPENCL_ENABLED> -) - -target_include_directories(ckw_prototype - PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include - PRIVATE ${CMAKE_CURRENT_LIST_DIR} -) - -#--------------------------------------------------------------------- -# Examples - -add_library(ckw_prototype_examples_common - examples/common/ExampleKernelWriter.cpp - examples/common/ExampleScopedKernelWriter.cpp - examples/common/ExampleComponentArgument.cpp -) - -target_link_libraries(ckw_prototype_examples_common PUBLIC ckw_prototype) - -add_executable(ckw_prototype_examples_add_exp_store examples/add_exp_store.cpp) -target_link_libraries(ckw_prototype_examples_add_exp_store PUBLIC ckw_prototype_examples_common) - -add_executable(writer_helper examples/writer_helper.cpp) -target_link_libraries(writer_helper PUBLIC ckw_prototype) diff --git a/compute_kernel_writer/prototype/examples/add_exp_store.cpp b/compute_kernel_writer/prototype/examples/add_exp_store.cpp deleted file mode 100644 index 2b640ca01b..0000000000 --- a/compute_kernel_writer/prototype/examples/add_exp_store.cpp +++ /dev/null @@ -1,206 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/Error.h" -#include "ckw/KernelArgument.h" -#include "ckw/KernelWriter.h" -#include "ckw/TensorOperand.h" -#include "ckw/TensorTileSampler.h" -#include "ckw/TileOperand.h" - -#include "common/ExampleComponentArgument.h" -#include "common/ExampleKernelWriter.h" -#include "common/ExampleScopedKernelWriter.h" -#include <iostream> -#include <vector> - -using namespace ckw; - -TensorTileSampler create_simple_sampler(ExampleScopedKernelWriter writer) -{ - TensorTileSampler sampler; - - constexpr int32_t m0 = 4; - constexpr int32_t n0 = 4; - - auto &gid_0 = writer->declare_tile("gid_0", DataType::Int32); - auto &gid_1 = writer->declare_tile("gid_1", DataType::Int32); - auto &gid_2 = writer->declare_tile("gid_2", DataType::Int32); - - auto &const_0 = writer->declare_tile("0", 0); - - writer->op_get_global_id(gid_0, 0); - writer->op_get_global_id(gid_1, 1); - writer->op_get_global_id(gid_2, 2); - - sampler.x(gid_0); - sampler.y(gid_1); - sampler.z(const_0); - sampler.b(gid_2); - - sampler.width(n0); - sampler.height(m0); - - sampler.format(TensorSamplerFormat::C_WH_1); - sampler.address_mode_x(TensorSamplerAddressModeX::None); - sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder); - sampler.address_mode_z(TensorSamplerAddressModeZ::Skip); - - return sampler; -} - -void op_binary_elementwise(ExampleScopedKernelWriter writer, std::vector<ExampleComponentArgument *> operands) -{ - auto lhs = operands.at(0); - auto rhs = operands.at(1); - auto dst = operands.at(2); - - // Load the LHS and RHS tile and prepare the tensor sampler. - if (!lhs->has_tile() && !rhs->has_tile()) - { - const auto sampler = create_simple_sampler(writer); - - writer->op_load_once(lhs, sampler); - writer->op_load_once(rhs, sampler); - } - else if (lhs->has_tile()) - { - const auto &sampler = lhs->tile_sampler(); - writer->op_load_once(rhs, sampler); - } - else - { - const auto &sampler = rhs->tile_sampler(); - writer->op_load_once(lhs, sampler); - } - - auto &lhs_tile = lhs->tile(); - auto &rhs_tile = rhs->tile(); - const auto &sampler = lhs->tile_sampler(); - - // Prepare the output tile. - if (!dst->has_tile()) - { - auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info()); - dst->init_virtual_tensor(tile, sampler); - } - - auto &dst_tile = dst->tile(); - - // Perform the operation. - writer->op_binary_expression(dst_tile, lhs_tile, BinaryOp::Add, rhs_tile); -} - -void op_exp(ExampleScopedKernelWriter writer, std::vector<ExampleComponentArgument *> operands) -{ - auto src = operands.at(0); - auto dst = operands.at(1); - - // Load the source tile and prepare the sampler. - if (!src->has_tile()) - { - const auto sampler = create_simple_sampler(writer); - writer->op_load_once(src, sampler); - } - - auto &src_tile = src->tile(); - const auto &sampler = src->tile_sampler(); - - // Prepare the output tile. - if (!dst->has_tile()) - { - auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info()); - dst->init_virtual_tensor(tile, sampler); - } - - auto &dst_tile = dst->tile(); - - // Perform the operation. - writer->op_unary_elementwise_function(dst_tile, UnaryFunction::Exp, src_tile); -} - -void op_store(ExampleScopedKernelWriter writer, std::vector<ExampleComponentArgument *> operands) -{ - auto src = operands.at(0); - auto dst = operands.at(1); - - auto &src_tile = src->tile(); - const auto &sampler = src->tile_sampler(); - auto &dst_tensor = dst->tensor(); - - writer->op_store(dst_tensor, src_tile, sampler); -} - -int main() -{ - Kernel kernel("example", GpuTargetLanguage::OpenCL); - ExampleKernelWriter root_writer(kernel); - - ExampleScopedKernelWriter writer(&root_writer); - - const TensorInfo src0_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 0); - const TensorInfo src1_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 1); - const TensorInfo dst_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 2); - - ExampleComponentArgument src0( - writer->declare_tensor_argument("src0", src0_info, TensorStorageType::BufferUint8Ptr)); - ExampleComponentArgument src1( - writer->declare_tensor_argument("src1", src1_info, TensorStorageType::BufferUint8Ptr)); - ExampleComponentArgument dst(writer->declare_tensor_argument("dst", dst_info, TensorStorageType::BufferUint8Ptr)); - - ExampleComponentArgument ans; - - op_binary_elementwise(writer, {&src0, &src1, &ans}); - op_exp(writer, {&ans, &ans}); - op_store(writer, {&ans, &dst}); - - const auto arguments = kernel.arguments(); - - std::cout << "\n====================\nArguments:\n====================\n"; - - for (auto &arg : arguments) - { - switch (arg.type()) - { - case ckw::KernelArgument::Type::TensorStorage: - std::cout << "* Tensor storage: ID = " << arg.id() << ", type = " << std::hex << "0x" - << static_cast<uint32_t>(arg.tensor_storage_type()) << std::dec << "\n"; - break; - - case ckw::KernelArgument::Type::TensorComponent: - std::cout << "* Tensor component: ID = " << arg.id() << ", type = " << std::hex << "0x" - << static_cast<uint32_t>(arg.tensor_component_type()) << std::dec << "\n"; - break; - - default: - CKW_ASSERT(false); - } - } - - std::cout << "\n====================\nCode:\n====================\n"; - const auto code = root_writer.generate_code(); - std::cout << code; - - return 0; -} diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp deleted file mode 100644 index 55223dae0e..0000000000 --- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp +++ /dev/null @@ -1,98 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ExampleComponentArgument.h" - -#include "ckw/Error.h" - -ExampleComponentArgument::ExampleComponentArgument() -{ -} - -ExampleComponentArgument::ExampleComponentArgument(ckw::TensorOperand &tensor) : _tensor(&tensor) -{ -} - -ExampleComponentArgument &ExampleComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, - const ckw::TensorTileSampler &tile_sampler) -{ - CKW_ASSERT(_tile == nullptr); - - _tile = &tile; - _tile_sampler = tile_sampler; - - return *this; -} - -bool ExampleComponentArgument::has_tensor() const -{ - return _tensor != nullptr; -} - -ckw::TensorOperand &ExampleComponentArgument::tensor() -{ - CKW_ASSERT(_tensor != nullptr); - - return *_tensor; -} - -const ckw::TensorOperand &ExampleComponentArgument::tensor() const -{ - CKW_ASSERT(_tensor != nullptr); - - return *_tensor; -} - -bool ExampleComponentArgument::has_tile() const -{ - return _tile != nullptr; -} - -ckw::TileOperand &ExampleComponentArgument::tile() -{ - CKW_ASSERT(_tile != nullptr); - - return *_tile; -} - -const ckw::TileOperand &ExampleComponentArgument::tile() const -{ - CKW_ASSERT(_tile != nullptr); - - return *_tile; -} - -ckw::TensorTileSampler &ExampleComponentArgument::tile_sampler() -{ - CKW_ASSERT(_tile != nullptr); - - return _tile_sampler; -} - -const ckw::TensorTileSampler &ExampleComponentArgument::tile_sampler() const -{ - CKW_ASSERT(_tile != nullptr); - - return _tile_sampler; -} diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h deleted file mode 100644 index 0e029b1157..0000000000 --- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLECOMPONENTARGUMENT_H -#define CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLECOMPONENTARGUMENT_H - -#include "ckw/TensorTileSampler.h" - -namespace ckw -{ -class TensorOperand; - -class TileOperand; -} // namespace ckw - -/** The argument of a dynamic fusion component which can be either user tensor or virtual tensor. */ -class ExampleComponentArgument -{ -public: - /** Initialize a new instance of @ref ExampleComponentArgument class for empty virtual tensor. */ - ExampleComponentArgument(); - - /** Initialize a new instance of @ref ExampleComponentArgument class for user tensor. - * - * @param[in] tensor The user tensor. - */ - explicit ExampleComponentArgument(ckw::TensorOperand &tensor); - - /** Set virtual tensor information (tile, sampler) for the argument. - * - * If the component is a user tensor, it can be treated as virtual tensor as well - * and won't be loaded again using @ref ExampleKernelWriter::op_load_once method. - * - * @param[in] tile The tile that has been loaded. - * @param[in] sampler The tensor sampling information that has been used to load the tile. - */ - ExampleComponentArgument &init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &sampler); - - /** Get whether the argument is a user tensor. */ - bool has_tensor() const; - - /** Get the tensor operand. - * - * If the tensor is not available, throw an error. - */ - ckw::TensorOperand &tensor(); - - /** Get the tensor operand. - * - * If the tensor is not available, throw an error. - */ - const ckw::TensorOperand &tensor() const; - - /** Get whether the argument contains a tile. - * - * The argument can be either a user tensor that has been loaded, - * or a virtual tensor (i.e. a tile with tensor sampling information). - */ - bool has_tile() const; - - /** Get the tile operand. - * - * If the tile is not available, throw an error. - */ - ckw::TileOperand &tile(); - - /** Get the tile operand. - * - * If the tile is not available, throw an error. - */ - const ckw::TileOperand &tile() const; - - /** Get the tensor sampling information for the tile. - * - * If the tile is not available, throw an error. - */ - ckw::TensorTileSampler &tile_sampler(); - - /** Get the tensor sampling information for the tile. - * - * If the tile is not available, throw an error. - */ - const ckw::TensorTileSampler &tile_sampler() const; - -private: - ckw::TensorOperand *_tensor{nullptr}; - ckw::TileOperand *_tile{nullptr}; - ckw::TensorTileSampler _tile_sampler{}; -}; - -#endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLECOMPONENTARGUMENT_H diff --git a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp deleted file mode 100644 index 1734ce8823..0000000000 --- a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp +++ /dev/null @@ -1,52 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ExampleKernelWriter.h" - -#include "ckw/Error.h" -#include "ckw/TileInfo.h" - -#include "ExampleComponentArgument.h" - -ExampleKernelWriter::ExampleKernelWriter(ckw::Kernel &kernel) : KernelWriter(kernel) -{ -} - -void ExampleKernelWriter::op_load_once(ExampleComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler) -{ - if (!tensor_or_tile->has_tile()) - { - CKW_ASSERT(tensor_or_tile->has_tensor()); - - auto &tensor = tensor_or_tile->tensor(); - - const auto tile_name = tensor.name() + "_tile"; - auto &tile = - declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); - - op_load(tile, tensor, sampler); - - tensor_or_tile->init_virtual_tensor(tile, sampler); - } -} diff --git a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h deleted file mode 100644 index 1528c3d933..0000000000 --- a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h +++ /dev/null @@ -1,56 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLEKERNELWRITER_H -#define CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLEKERNELWRITER_H - -#include "ckw/KernelWriter.h" -#include "ckw/TensorTileSampler.h" - -class ExampleComponentArgument; - -namespace ckw -{ -class Kernel; -} // namespace ckw - -/** Extended implementation of kernel writer for dynamic fusion. */ -class ExampleKernelWriter : public ckw::KernelWriter -{ -public: - /** Initialize a new instance of @ref ExampleKernelWriter class. - * - * @param[in] kernel The kernel to be generated. - */ - explicit ExampleKernelWriter(ckw::Kernel &kernel); - - /** Load the user tensor to the tile in the same component argument if it hasn't been loaded. - * - * @param[in] tensor_or_tile The component argument that is either a user tensor or a virtual tensor. - * @param[in] sampler The tensor sampling information to load the tile. - */ - void op_load_once(ExampleComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler); -}; - -#endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLEKERNELWRITER_H diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp deleted file mode 100644 index 784d5ffb96..0000000000 --- a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp +++ /dev/null @@ -1,59 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ExampleScopedKernelWriter.h" - -#include "ExampleKernelWriter.h" - -ExampleScopedKernelWriter::ExampleScopedKernelWriter(ExampleKernelWriter *writer) - : _writer(writer), _parent_id_space(writer->id_space()) -{ - _writer->next_id_space(); -} - -ExampleScopedKernelWriter::ExampleScopedKernelWriter(const ExampleScopedKernelWriter &other) - : _writer(other._writer), _parent_id_space(other._writer->id_space()) -{ - _writer->next_id_space(); -} - -ExampleKernelWriter *ExampleScopedKernelWriter::operator->() -{ - return _writer; -} - -const ExampleKernelWriter *ExampleScopedKernelWriter::operator->() const -{ - return _writer; -} - -ExampleKernelWriter *ExampleScopedKernelWriter::writer() -{ - return _writer; -} - -const ExampleKernelWriter *ExampleScopedKernelWriter::writer() const -{ - return _writer; -} diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h deleted file mode 100644 index 4655b1897e..0000000000 --- a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h +++ /dev/null @@ -1,62 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H -#define CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H - -#include <cstdint> - -class ExampleKernelWriter; - -/** Helper to automatically manage kernel writer ID space. */ -class ExampleScopedKernelWriter -{ -public: - /** Initialize a new instance of @ref ExampleScopedKernelWriter class. */ - explicit ExampleScopedKernelWriter(ExampleKernelWriter *writer); - - /** Create a new scope from the specified scoped kernel writer. */ - ExampleScopedKernelWriter(const ExampleScopedKernelWriter &other); - - /** Assignment is disallowed. */ - ExampleScopedKernelWriter &operator=(const ExampleScopedKernelWriter &) = delete; - - /** Access the underlying kernel writer. */ - ExampleKernelWriter *operator->(); - - /** Access the underlying kernel writer. */ - const ExampleKernelWriter *operator->() const; - - /** Get the kernel writer. */ - ExampleKernelWriter *writer(); - - /** Get the kernel writer. */ - const ExampleKernelWriter *writer() const; - -private: - ExampleKernelWriter *_writer; - int32_t _parent_id_space; -}; - -#endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H diff --git a/compute_kernel_writer/prototype/examples/writer_helper.cpp b/compute_kernel_writer/prototype/examples/writer_helper.cpp deleted file mode 100644 index 8623afbf50..0000000000 --- a/compute_kernel_writer/prototype/examples/writer_helper.cpp +++ /dev/null @@ -1,113 +0,0 @@ -/* -* Copyright (c) 2023 Arm Limited. -* -* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person obtaining a copy -* of this software and associated documentation files (the "Software"), to -* deal in the Software without restriction, including without limitation the -* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -* sell copies of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in all -* copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#include "ckw/KernelWriter.h" -#include "ckw/TensorTileSampler.h" - -#include "../include/ckw/KernelWriterHelper.h" -#include <iostream> - -using namespace ckw; - -TensorTileSampler create_simple_sampler(KernelWriter &writer) -{ - TensorTileSampler sampler; - - constexpr int32_t m0 = 1; - constexpr int32_t n0 = 1; - - auto &gid_0 = writer.declare_tile("gid_0", DataType::Int32); - auto &gid_1 = writer.declare_tile("gid_1", DataType::Int32); - auto &gid_2 = writer.declare_tile("gid_2", DataType::Int32); - - auto &const_0 = writer.declare_tile("0", 0); - - writer.op_get_global_id(gid_0, 0); - writer.op_get_global_id(gid_1, 1); - writer.op_get_global_id(gid_2, 2); - - sampler.x(gid_0); - sampler.y(gid_1); - sampler.z(gid_2); - sampler.b(const_0); - - sampler.width(n0); - sampler.height(m0); - - sampler.format(TensorSamplerFormat::C_WH_1); - sampler.address_mode_x(TensorSamplerAddressModeX::None); - sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder); - sampler.address_mode_z(TensorSamplerAddressModeZ::Skip); - - return sampler; -} - -int main() -{ - Kernel kernel("test", GpuTargetLanguage::OpenCL); - KernelWriterHelper<KernelWriter> writer(kernel); - - const TensorInfo src_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 0); - const TensorInfo dst_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 1); - - auto &src_tensor = writer.declare_tensor_argument("src", src_info); - auto &dst_tensor = writer.declare_tensor_argument("dst", dst_info); - - const auto sampler = create_simple_sampler(writer); - - auto &src = writer.declare_tile("src_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); - auto &other = - writer.declare_tile("other_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); - auto &dst = writer.declare_tile("dst_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width())); - - writer.op_load(src, src_tensor, sampler); - writer.op_load(other, src_tensor, sampler); - writer.op_load(dst, dst_tensor, sampler); - - auto test = dst ^ src ^ other; - auto other_test = logical_and(dst, src, other); - writer.op_assign(dst, logical_and(dst, src, other)); - writer.op_assign(dst, test); - writer.op_assign(dst, other_test); - writer.op_assign(dst, operator^(operator^(dst, src), other)); - - writer.op_if(exp(src) == dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); }) - .op_else_if(exp(src) > dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); }) - .op_else([&] { writer.op_assign(dst, src); }); - - writer.op_assign(dst, src + src * src); - writer.op_assign(dst, src * max(src, dst) + src); - writer.op_assign(dst, src * select(src, dst, src) + src); - - writer.op_assign(dst, src ^ dst); - writer.op_assign(dst, ~src); - - writer.op_for_loop(dst < src, dst += src, [&] { writer.op_assign(dst, src + dst); }); - - writer.op_assign(dst += src); - writer.op_assign(dst += exp(src)); - - std::cout << "======== KERNEL ========" << std::endl; - std::cout << writer.generate_code() << std::endl; -} diff --git a/compute_kernel_writer/prototype/include/ckw/Error.h b/compute_kernel_writer/prototype/include/ckw/Error.h deleted file mode 100644 index aab713c817..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/Error.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_ERROR_H -#define CKW_PROTOTYPE_INCLUDE_CKW_ERROR_H - -#include <stdexcept> -#include <string> - -namespace ckw -{ - -/** If the condition is not met, throw an std::runtime_error with the specified message. - * - * @param[in] cond The condition that is expected to be true. - * @param[in] msg The error message when the condition is not met. - */ -#define CKW_ASSERT_MSG(cond, msg) \ - do \ - { \ - if (!(cond)) \ - { \ - throw ::std::runtime_error(msg); \ - } \ - } while (false) - -/** If the condition is not met, throw an std::runtime_error. - * - * @param[in] cond The condition that is expected to be true. - */ -#define CKW_ASSERT(cond) CKW_ASSERT_MSG(cond, #cond) - -/** If the precondition is met but the consequence is not met, throw an std::runtime_error. - * - * @param[in] precond The condition if is met requires the consequence must also be met. - * @param[in] cond The condition that is expected to be true if the precondition is true. - */ -#define CKW_ASSERT_IF(precond, cond) CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond) - -/** Mark the variables as unused. - * - * @param[in] ... Variables which are unused. - */ -#define CKW_UNUSED(...) ::ckw::ignore_unused(__VA_ARGS__) // NOLINT - -/** Mark the variables as unused. - * - * @param[in] ... Variables which are unused. - */ -template <typename... T> -inline void ignore_unused(T &&...) -{ -} - -} // namespace ckw - -#endif // CKW_INCLUDE_CKW_ERROR_H diff --git a/compute_kernel_writer/prototype/include/ckw/Kernel.h b/compute_kernel_writer/prototype/include/ckw/Kernel.h deleted file mode 100644 index ba31a29ba7..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/Kernel.h +++ /dev/null @@ -1,102 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_KERNEL_H -#define CKW_PROTOTYPE_INCLUDE_CKW_KERNEL_H - -#include "ckw/KernelArgument.h" -#include "ckw/OperandBase.h" -#include "ckw/types/GpuTargetLanguage.h" - -#include <map> -#include <memory> -#include <string> -#include <vector> - -namespace ckw -{ - -class TileOperand; - -namespace prototype -{ -class GpuKernelWriterDataHolder; -} // namespace prototype - -/** The target for kernel writer to write into. */ -class Kernel -{ -public: - /** Constructor - * - * @param[in] language The programming language to write the kernel. - */ - Kernel(GpuTargetLanguage language); - /** Constructor - * - * @param[in] name The name of the kernel function. - * @param[in] language The programming language to write the kernel. - */ - Kernel(const char *name, GpuTargetLanguage language); - - /** Destructor */ - ~Kernel(); - - /** Get the name of the kernel function. */ - const std::string &name() const; - - /** Set the name of the kernel function. - * - * @param[in] name The name of the kernel function. - */ - void name(const std::string &name); - - /** Get the list of kernel arguments. */ - ::std::vector<KernelArgument> arguments() const; - - /** (Internal use only) Register the tile operand. - * - * @param operand The tile operand to be registered. - */ - TileOperand ®ister_operand(::std::unique_ptr<TileOperand> operand); - - /** (Internal use only) Register the tensor operand. - * - * @param operand The tensor operand to be registered. - */ - TensorOperand ®ister_operand(::std::unique_ptr<TensorOperand> operand); - - /** (Internal use only) Get the implementation data. */ - prototype::GpuKernelWriterDataHolder *impl(); - -private: - ::std::string _name; - ::std::unique_ptr<prototype::GpuKernelWriterDataHolder> _kernel; - ::std::map<::std::string, ::std::unique_ptr<OperandBase>> _operands; - ::std::map<int32_t, TensorOperand *> _tensor_id_operands; -}; - -} // namespace ckw - -#endif // CKW_PROTOTYPE_INCLUDE_CKW_KERNEL_H diff --git a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h b/compute_kernel_writer/prototype/include/ckw/KernelArgument.h deleted file mode 100644 index 3384a20aef..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_KERNELARGUMENT_H -#define CKW_PROTOTYPE_INCLUDE_CKW_KERNELARGUMENT_H - -#include "ckw/TensorInfo.h" - -#include <cstdint> - -namespace ckw -{ - -class TensorOperand; -class TensorComponentOperand; - -/** A kernel argument which can be either a tensor storage or a tensor component. */ -class KernelArgument -{ -public: - /** The type of kernel argument. */ - enum class Type : int32_t - { - /** The argument that provides the read and/or write access to the tensor data. - * - * See @ref ckw::TensorStorage to see the list of supported storage type. - */ - TensorStorage, - - /** The argument that provides extra information about the tensor. - * - * See @ref ckw::TensorComponent to see the list of supported component. - */ - TensorComponent, - }; - - /** Initialize a new instance of kernel argument class for a tensor storage argument. - * - * @param[in] tensor The tensor whose storage is exposed to kernel arguments. - */ - KernelArgument(TensorOperand &tensor); - - /** Initialize a new instance of kernel argument class for a tensor component argument. - * - * @param[in] tensor_component The tensor component to be exposed to kernel arguments. - */ - KernelArgument(TensorComponentOperand &tensor_component); - - /** Get the type of kernel argument. */ - Type type() const; - - /** Get the argument ID. - * - * This method can be used to get the tensor info ID of both tensor storage and tensor component arguments. - */ - int32_t id() const; - - /** Get the type of tensor storage. - * - * This method can only be used for tensor storage argument. - */ - TensorStorageType tensor_storage_type() const; - - /** Get the tensor component type. - * - * This method can only be used for tensor component argument. - */ - TensorComponentType tensor_component_type() const; - -private: - Type _type; - int32_t _id; - - union SubId - { - int32_t unknown; - TensorStorageType tensor_storage_type; - TensorComponentType tensor_component_type; - }; - - SubId _sub_id{0}; -}; - -} // namespace ckw - -#endif // CKW_PROTOTYPE_INCLUDE_CKW_KERNELARGUMENT_H diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h deleted file mode 100644 index f9e0066f91..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h +++ /dev/null @@ -1,338 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_KERNELWRITER_H -#define CKW_PROTOTYPE_INCLUDE_CKW_KERNELWRITER_H - -#include "ckw/Kernel.h" -#include "ckw/TensorInfo.h" -#include "ckw/TensorOperand.h" -#include "ckw/TileInfo.h" -#include "ckw/TileOperand.h" -#include "ckw/types/ConvertPolicy.h" -#include "ckw/types/Functions.h" -#include "ckw/types/Operators.h" - -#include <memory> - -namespace ckw -{ - -namespace prototype -{ -struct GpuKernelWriterAttribute; - -class IGpuKernelWriter; -} // namespace prototype - -/** Kernel writer. */ -class KernelWriter -{ -public: - // ============================================================================================= - // Constructors and destructor - // ============================================================================================= - - /** Initialize a new instance of kernel writer. - * - * @param[in] kernel The kernel to be written to. - */ - explicit KernelWriter(Kernel &kernel); - - /** Destructor */ - ~KernelWriter(); - - /** No copy constructor. */ - KernelWriter(const KernelWriter &) = delete; - - /** No copy assignment. */ - KernelWriter &operator=(const KernelWriter &) = delete; - - // ============================================================================================= - // Scope management - // ============================================================================================= - - /** Get the current ID space. */ - int32_t id_space() const; - - /** Set the current ID space. */ - KernelWriter &id_space(int32_t id_space); - - /** Switch to and return a new ID space. */ - int32_t next_id_space(); - - // ============================================================================================= - // Tensor and tile declaration - // ============================================================================================= - - /** Declare a tensor argument. - * - * @param[in] name The name of the tensor. - * @param[in] info The tensor info. - * @param[in] storage_type The tensor storage type. - * - * @return The @ref TensorOperand object. - */ - TensorOperand &declare_tensor_argument(const std::string &name, - const TensorInfo &info, - TensorStorageType storage_type = TensorStorageType::BufferUint8Ptr); - - /** Declare a compile-time constant scalar argument. - * - * @param[in] name The name of the tile. - * @param[in] value The value of the tile. - * - * @return The @ref TileOperand object. - */ - TileOperand &declare_tile_argument(const std::string &name, int32_t value); - - /** Declare a new tile. - * - * The name of the tile must be unique in the current ID space. - * - * @param[in] name The name of the tile. - * @param[in] ... The necessary arguments to create a new @ref TileOperand. - * - * @return The @ref TileOperand object. - */ - template <typename... TArgs> - TileOperand &declare_tile(const std::string &name, TArgs &&...args) - { - const auto var_name = generate_variable_name(name); - auto operand = std::make_unique<TileOperand>(var_name, ::std::forward<TArgs>(args)...); - - return declare_tile_operand(std::move(operand)); - } - - // ============================================================================================= - // Load and store - // ============================================================================================= - - /** Load the data from the tensor memory to the tile using the sampling information. - * - * @param[out] tile The tile to be loaded. - * @param[in] tensor The tensor to be read. - * @param[in] sampler The tensor sampling information. - * @param[in] dilation_y Dilation in the Y dimension. - */ - void op_load(TileOperand &tile, - const TensorOperand &tensor, - const TensorTileSampler &sampler, - const TileOperand &dilation_y = TileOperand("dil_y", 1)); - - /** Load the data from the tensor memory to the tile using the indirect buffer approach and respective of the sampling information. - * - * @param[out] tile The tile to be loaded. - * @param[in] tensor The tensor to be read. - * @param[in] sampler The tensor sampling information. - */ - void op_load_indirect(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler); - - /** Construct an indirection buffer in @p tile containing the precalculated addresses of elements in the source tensor. - * - * @param[out] tile The tile to be loaded. - * @param[in] tensor The tensor the be read. - * @param[in] sampler The tensor sampling information. - * @param[in] x The X coordinate. - * @param[in] y The Y coordinate. - * @param[in] x_off Offset in the X dimension. - * @param[in] y_off Offset in the Y dimension. - */ - void util_get_indirect_buffer(TileOperand &tile, - const TensorOperand &tensor, - const TensorTileSampler &sampler, - const TileOperand &x, - const TileOperand &y, - const TileOperand &x_off, - const TileOperand &y_off); - - /** Store the tile to the tensor using the specified sampling information. - * - * @param[out] dst The tensor that the tile is written to. - * @param[in] src The tile to be stored. - * @param[in] sampler The tensor sampling information. - */ - void op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler); - - // ============================================================================================= - // Data processing - // ============================================================================================= - - /** Write assignment: `<dst> = <src>;`. - * - * @param[out] dst The destination tile. - * @param[in] src The source tile. - */ - void op_assign(const TileOperand &dst, const TileOperand &src); - - /** Write the cast: `<dst> = convert_<dst.type><_sat>(<src>);`. - * - * @param[out] dst The destination tile. - * @param[in] src The source tile. - * @param[in] policy The policy governing the behavior of the cast. - */ - void op_cast_expression(const TileOperand &dst, const TileOperand &src, ConvertPolicy policy); - - /** Write the unary expression: `<dst> = <op> <src>`. - * - * @param[out] dst The destination tile. - * @param[in] op The unary operator. - * @param[in] src The source tile. - */ - void op_unary_expression(const TileOperand &dst, UnaryOp op, const TileOperand &src); - - /** Write binary expression: `<dst> = <lhs> <op> <rhs>;`. - * - * @param[out] dst The destination tile. - * @param[in] lhs The LHS tile. - * @param[in] op The binary operator. - * @param[in] rhs The RHS tile. - */ - void op_binary_expression(const TileOperand &dst, const TileOperand &lhs, BinaryOp op, const TileOperand &rhs); - - /** Write function applied to scalar value: `<dst> = <func>(<src>);`. - * - * @param[out] dst The destination tile. - * @param[in] func The function to be applied to the source tile. - * @param[in] src The source tile. - */ - void op_unary_elementwise_function(const TileOperand &dst, UnaryFunction func, const TileOperand &src); - - /** Write function applied to scalar value: `<dst> = <func>(<first>, <second>);`. - * - * @param[out] dst The destination tile. - * @param[in] func The function to be applied to the source tiles. - * @param[in] first The first argument tile. - * @param[in] second The second argument tile. - */ - void op_binary_elementwise_function(const TileOperand &dst, - BinaryFunction func, - const TileOperand &first, - const TileOperand &second); - - /** Write function applied to scalar value: `<dst> = <func>(<first>, <second>, <third>);`. - * - * @param[out] dst The destination tile. - * @param[in] func The function to be applied to the source tiles. - * @param[in] first The first argument tile. - * @param[in] second The second argument tile. - * @param[in] third The third argument tile. - */ - void op_ternary_elementwise_function(const TileOperand &dst, - TernaryFunction func, - const TileOperand &first, - const TileOperand &second, - const TileOperand &third); - - /** Write if-statement: `if(<lhs> <op> <rhs>) { <body> }`. - * - * @param[in] lhs The LHS tile of the condition. - * @param[in] op The relational binary operator. - * @param[in] rhs The RHS tile of the condition. - * @param[in] body The body of the if-statement. - */ - void op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body); - - /** Write else-if-statement: `else if(<lhs> <op> <rhs>) { <body> }`. - * - * @param[in] lhs The LHS tile of the condition. - * @param[in] op The relational binary operator. - * @param[in] rhs The RHS tile of the condition. - * @param[in] body The body of the else-if-statement. - */ - void op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body); - - /** Write an else-statement: `else { <body> }`. - * - * @param[in] body The body of the else-statement. - */ - void op_else(const std::function<void()> &body); - - /** Write for-loops: `for(; <var> <cond_op> <cond_value>; <var> <update_op> <update_value>) { body }`. - * - * @param[in] var_name The name of the variable used in condition. - * @param[in] cond_op The relational binary operator used in condition. - * @param[in] cond_value_name The value which the variable is compared against. - * @param[in] update_var_name The name of the variable which is updated. - * @param[in] update_op The assignment operator used for updating the update value. - * @param[in, out] update_value The value which is updated at every iteration. - * @param[in] body The body of the for-loop. - */ - void op_for_loop(const TileOperand &var_name, - BinaryOp cond_op, - const TileOperand &cond_value_name, - const TileOperand &update_var_name, - AssignmentOp update_op, - const TileOperand &update_value_name, - const std::function<void()> &body); - - /** Write the return statement: `return;` - */ - void op_return(); - - // ============================================================================================= - // Misc - // ============================================================================================= - - /** Set `dst` the global ID of dimension `dim`. - * - * @param[out] dst The tile to be written to. - * @param[in] dim The global ID dimension. - */ - void op_get_global_id(const TileOperand &dst, int32_t dim); - - // ============================================================================================= - // Code generation - // ============================================================================================= - - /** Generate the source code of the kernel. */ - ::std::string generate_code(); - -private: - /** Generate the full variable name based on the original name and the ID space. - * - * @param[in] name The name of the variable. - * - * @return The full variable name. - */ - ::std::string generate_variable_name(const std::string &name) const; - - /** Declare the tile operand. - * - * @param[in] operand The tile operand to be declared. - */ - TileOperand &declare_tile_operand(std::unique_ptr<TileOperand> operand); - -private: - Kernel *_kernel; - ::std::unique_ptr<prototype::GpuKernelWriterAttribute> _impl_attr; - ::std::unique_ptr<prototype::IGpuKernelWriter> _impl; - - int32_t _id_space{0}; - int32_t _max_id_space{0}; -}; - -} // namespace ckw - -#endif // CKW_PROTOTYPE_INCLUDE_CKW_KERNELWRITER_H diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h b/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h deleted file mode 100644 index 3ba079bbc2..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h +++ /dev/null @@ -1,1286 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_KERNELWRITERHELPER_H -#define CKW_INCLUDE_CKW_KERNELWRITERHELPER_H - -#include "ckw/KernelWriter.h" -#include "ckw/TensorOperand.h" -#include "ckw/TileOperand.h" - -#include <iostream> -#include <type_traits> - -/* - * By including this header file you will be able to supplement the default - * Compute Kernel Writer API with additional syntax to help ease the use of CKW. - * - * To use the KernelWriterHelper you need to wrap your instance of KernelWriter - * (or any class deriving from KernelWriter): - * KernelWriterHelper<KernelWriter> writer; - * The resulting writer object comprises the original KernelWriter - * functionality (drop-in replacement), but extends the syntax as follows. - * - * Common functions/operators have natural syntax: - * 1. Unary expressions: - * writer.op_assign(dst, !src); // Logical NOT - * writer.op_assign(dst, ~src); // Bitwise NOT - * - * 2. Binary expressions: - * writer.op_assign(dst, lhs + rhs); // Addition - * writer.op_assign(dst, lhs - rhs); // Subtraction - * writer.op_assign(dst, lhs * rhs); // Multiplication - * writer.op_assign(dst, lhs / rhs); // Division - * writer.op_assign(dst, lhs % rhs); // Modulo - * writer.op_assign(dst, lhs == rhs); // Equality - * writer.op_assign(dst, lhs < rhs); // Less-than - * writer.op_assign(dst, lhs <= rhs); // Less-than-or-equal - * writer.op_assign(dst, lhs > rhs); // Greater-than - * writer.op_assign(dst, lhs >= rhs); // Greater-than-or-equal - * writer.op_assign(dst, lhs ^ rhs); // Bitwise XOR - * writer.op_assign(dst, logical_and(lhs, rhs)); // Logical AND - * writer.op_assign(dst, logical_or(lhs, rhs)); // Logical OR - * - * 3. Unary elementwise functions: - * writer.op_assign(dst, exp(src)); // Exponent - * writer.op_assign(dst, tanh(src)); // Hyperbolic tangent - * writer.op_assign(dst, sqrt(src)); // Square root - * writer.op_assign(dst, erf(src)); // Error function - * writer.op_assign(dst, fabs(src)); // Absolute of floating-point number - * writer.op_assign(dst, log(src)); // Natural logarithm - * writer.op_assign(dst, round(src)); // Round - * writer.op_assign(dst, sizeOf(src)); // sizeof - * - * 4. Binary elementwise functions: - * writer.op_assign(dst, max(first, second)); // Max - * writer.op_assign(dst, min(first, second)); // Min - * - * 5. Ternary elementwise functions: - * writer.op_assign(dst, select(first, second, third)); // Select - * - * NOTE: All the above examples support nesting, so you could write - * something like: writer.op_assign(dst, src * (log(arg) + sqrt(abs(arg))); - * - * - * 6. If-statements. The preceding syntax also allows easier writing of if-statements: - * writer.op_if(<cond>, <body>); - * - * For example: - * writer.op_if(exp(first_arg) == dst, [&]{ - * //... - * }).op_else_if(exp(first_arg) > dst, [&]{ - * //... - * }).op_else([&] { - * //... - * }); - * - * 7. For-loops. A similar syntax exists for for-loops: - * writer.op_for_loop(<cond>, <updater>, <body>); - * - * For example: - * writer.op_for_loop(index < limit, index += step, [&]{ - * //... - * }); - * - * NOTE: There are limitations on the for-loop <cond> and <updater> parameters. - * In neither the <cond> (Binary expression) or <updater> (Increment/Decrement) - * is it allowed to use nesting. For example, `(index + other) < limit` and - * `index < round(limit)` are invalid <cond> parameters. This is because the - * semantics of for-loops rely on the condition being evaluated at every iteration, - * but as temporary variables might be defined for nested expressions the semantics - * cannot be guaranteed. - */ - -namespace ckw -{ - -// ================================================== -// Type traits -// ================================================== - -/** Specifies if the type can be used as an operand for functions (e.g. max), operations (e.g. *), or assignments. */ -template <typename T> -struct can_be_operand : ::std::false_type -{ -}; - -/** Specifies if the type can be assigned/written to. */ -template <typename T> -struct can_be_assigned : ::std::false_type -{ -}; - -template <> -struct can_be_operand<TileOperand &> : ::std::true_type -{ -}; - -template <> -struct can_be_assigned<TileOperand &> : ::std::true_type -{ -}; - -// ================================================== -// Assignment -// ================================================== - -/** AST node for assignments. - * - * Note that \p TRight must be an operand, and \p TLeft must be assignable. - * - * @tparam TLeft The type of the destination of the assignment. - * @tparam TRight The type of the source assigned to the destination. - */ -template <typename TLeft, - typename TRight, - typename = ::std::enable_if<can_be_operand<TRight>::value && can_be_assigned<TLeft>::value>> -struct Assignment -{ - TLeft lhs; - TRight rhs; - AssignmentOp opcode; -}; - -/** Represents the expression: `\p lhs += \p rhs`. - * - * @tparam TLeft The type of the LHS of the assignment. - * @tparam TRight The type of the RHS of the assignment. - * @param[in] lhs The LHS of the assignment. - * @param[in] rhs The RHS of the assignment. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline Assignment<TLeft, TRight> operator+=(TLeft &&lhs, TRight &&rhs) -{ - return Assignment<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), AssignmentOp::Increment}; -} - -/** Represents the expression: `\p lhs -= \p rhs`. - * - * @tparam TLeft The type of the LHS of the assignment. - * @tparam TRight The type of the RHS of the assignment. - * @param[in] lhs The LHS of the assignment. - * @param[in] rhs The RHS of the assignment. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline Assignment<TLeft, TRight> operator-=(TLeft &&lhs, TRight &&rhs) -{ - return Assignment<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), AssignmentOp::Decrement}; -} - -// ================================================== -// Unary expression -// ================================================== - -/** AST node for unary expressions. - * - * Note that \p TSrc must be an operand. - * - * @tparam TSrc The type of the argument to the expression. - */ -template <typename TSrc, typename = ::std::enable_if<can_be_operand<TSrc>::value>> -struct UnaryExpression -{ - TSrc src; - UnaryOp opcode; -}; - -template <typename TLeft> -struct can_be_operand<UnaryExpression<TLeft>> : ::std::true_type -{ -}; - -/** Represents the expression: `!\p src`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -inline UnaryExpression<TSrc> operator!(TSrc &&src) -{ - return UnaryExpression<TSrc>{std::forward<TSrc>(src), UnaryOp::LogicalNot}; -} - -/** Represents the expression: `~\p src`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -inline UnaryExpression<TSrc> operator~(TSrc &&src) -{ - return UnaryExpression<TSrc>{std::forward<TSrc>(src), UnaryOp::BitwiseNot}; -} - -// ================================================== -// Binary expressions -// ================================================== - -/** AST node for binary expressions. - * - * Note that both \p TLeft and \p TRight must be operands. - * - * @tparam TLeft The type of the left argument of the expression. - * @tparam TRight The type of the right argument of the expression. - */ -template <typename TLeft, - typename TRight, - typename = ::std::enable_if_t<can_be_operand<TLeft>::value && can_be_operand<TRight>::value>> -struct BinaryExpression -{ - TLeft lhs; - TRight rhs; - BinaryOp opcode; -}; - -template <typename TLeft, typename TRight> -struct can_be_operand<BinaryExpression<TLeft, TRight>> : ::std::true_type -{ -}; - -/** Represents the expression: `\p lhs + \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator+(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Add}; -} - -/** Represents the expression: `\p lhs - \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator-(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Sub}; -} - -/** Represents the expression: `\p lhs * \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator*(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Mul}; -} - -/** Represents the expression: `\p lhs / \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator/(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Div}; -} - -/** Represents the expression: `\p lhs % \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator%(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Mod}; -} - -/** Represents the expression: `\p lhs == \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator==(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Equal}; -} - -/** Represents the expression: `\p lhs < \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator<(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Less}; -} - -/** Represents the expression: `\p lhs <= \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator<=(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LessEqual}; -} - -/** Represents the expression: `\p lhs > \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator>(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Greater}; -} - -/** Represents the expression: `\p lhs >= \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator>=(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::GreaterEqual}; -} - -/** Represents the expression: `\p lhs ^ \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> operator^(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::BitwiseXOR}; -} - -/** Represents the expression: `\p lhs && \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> logical_and(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalAnd}; -} - -/** Represents the expression: `\p lhs && \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight, typename... TOps> -inline BinaryExpression<BinaryExpression<TLeft, TRight>, TOps...> logical_and(TLeft &&lhs, TRight &&rhs, TOps &&...ops) -{ - return logical_and( - BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalAnd}, - std::forward<TOps>(ops)...); -} - -/** Represents the expression: `\p lhs || \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight> -inline BinaryExpression<TLeft, TRight> logical_or(TLeft &&lhs, TRight &&rhs) -{ - return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalOr}; -} - -/** Represents the expression: `\p lhs || \p rhs`. - * - * @tparam TLeft The type of the LHS of the expression. - * @tparam TRight The type of the RHS of the expression. - * @param[in] lhs The LHS of the expression. - * @param[in] rhs The RHS of the expression. - * @return The resulting AST node. - */ -template <typename TLeft, typename TRight, typename... TOps> -inline BinaryExpression<BinaryExpression<TLeft, TRight>, TOps...> logical_or(TLeft &&lhs, TRight &&rhs, TOps &&...ops) -{ - return logical_or( - BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalOr}, - std::forward<TOps>(ops)...); -} - -// ================================================== -// Unary elementwise functions -// ================================================== - -/** AST node for unary elementwise functions. - * - * Note that \p TSrc must be an operand. - * - * @tparam TSrc The type of the argument to the function. - */ -template <typename TSrc, typename = ::std::enable_if<can_be_operand<TSrc>::value>> -struct UnaryElementwiseFunction -{ - TSrc src; - UnaryFunction opcode; -}; - -template <typename TLeft> -struct can_be_operand<UnaryElementwiseFunction<TLeft>> : ::std::true_type -{ -}; - -/** Represents the expression: `exp(\p src)`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -UnaryElementwiseFunction<TSrc> exp(TSrc &&src) -{ - return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Exp}; -} - -/** Represents the expression: `tanh(\p src)`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -UnaryElementwiseFunction<TSrc> tanh(TSrc &&src) -{ - return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Tanh}; -} - -/** Represents the expression: `sqrt(\p src)`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -UnaryElementwiseFunction<TSrc> sqrt(TSrc &&src) -{ - return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Sqrt}; -} - -/** Represents the expression: `erf(\p src)`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -UnaryElementwiseFunction<TSrc> erf(TSrc &&src) -{ - return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Erf}; -} - -/** Represents the expression: `fabs(\p src)`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -UnaryElementwiseFunction<TSrc> fabs(TSrc &&src) -{ - return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Fabs}; -} - -/** Represents the expression: `log(\p src)`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -UnaryElementwiseFunction<TSrc> log(TSrc &&src) -{ - return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Log}; -} - -/** Represents the expression: `round(\p src)`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -UnaryElementwiseFunction<TSrc> round(TSrc &&src) -{ - return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Round}; -} - -/** Represents the expression: `sizeof(\p src)`. - * - * @tparam TSrc The type of the argument. - * @param[in] src The argument. - * @return The resulting AST node. - */ -template <typename TSrc> -UnaryElementwiseFunction<TSrc> sizeOf(TSrc &&src) -{ - return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::SizeOf}; -} - -// ================================================== -// Binary elementwise functions -// ================================================== - -/** AST node for binary elementwise functions. - * - * Note that both \p TFirst and \p TSecond must be operands. - * - * @tparam TFirst The type of the left argument of the function. - * @tparam TSecond The type of the right argument of the function. - */ -template <typename TFirst, - typename TSecond, - typename = ::std::enable_if<can_be_operand<TFirst>::value && can_be_operand<TSecond>::value>> -struct BinaryElementwiseFunction -{ - TFirst first; - TSecond second; - BinaryFunction opcode; -}; - -template <typename TFirst, typename TSecond> -struct can_be_operand<BinaryElementwiseFunction<TFirst, TSecond>> : ::std::true_type -{ -}; - -/** Represents the function call: `max(\p first, \p second)`. - * - * @tparam TFirst The type of the first argument. - * @tparam TSecond The type of the second argument. - * @param[in] first The first argument. - * @param[in] second The second argument. - * @return The resulting AST node. - */ -template <typename TFirst, typename TSecond> -BinaryElementwiseFunction<TFirst, TSecond> max(TFirst &&first, TSecond &&second) -{ - return BinaryElementwiseFunction<TFirst, TSecond>{std::forward<TFirst>(first), std::forward<TSecond>(second), - BinaryFunction::Max}; -} - -/** Represents the function call: `min(\p first, \p second)`. - * - * @tparam TFirst The type of the first argument. - * @tparam TSecond The type of the second argument. - * @param[in] first The first argument. - * @param[in] second The second argument. - * @return The resulting AST node. - */ -template <typename TFirst, typename TSecond> -BinaryElementwiseFunction<TFirst, TSecond> min(TFirst &&first, TSecond &&second) -{ - return BinaryElementwiseFunction<TFirst, TSecond>{std::forward<TFirst>(first), std::forward<TSecond>(second), - BinaryFunction::Min}; -} - -// ================================================== -// Ternary elementwise functions -// ================================================== - -/** AST node for ternary elementwise functions. - * - * Note that \p TFirst, \p TSecond, and \p TThird all must be operands. - * - * @tparam TFirst The type of the first argument to the function. - * @tparam TSecond The type of the second argument to the function. - * @tparam TThird The type of the third argument to the function. - */ -template <typename TFirst, - typename TSecond, - typename TThird, - typename = ::std::enable_if<can_be_operand<TFirst>::value && can_be_operand<TSecond>::value && - can_be_operand<TThird>::value>> -struct TernaryElementwiseFunction -{ - TFirst first; - TSecond second; - TThird third; - TernaryFunction opcode; -}; - -template <typename TFirst, typename TSecond, typename TThird> -struct can_be_operand<TernaryElementwiseFunction<TFirst, TSecond, TThird>> : ::std::true_type -{ -}; - -/** Represents the function call: `select(\p first, \p second, \p third)`. - * - * @tparam TFirst The type of the first argument. - * @tparam TSecond The type of the second argument. - * @tparam TThird The type of the third argument. - * @param[in] first The first argument. - * @param[in] second The second argument. - * @param[in] third The third argument. - * @return The resulting AST node. - */ -template <typename TFirst, typename TSecond, typename TThird> -TernaryElementwiseFunction<TFirst, TSecond, TThird> select(TFirst &&first, TSecond &&second, TThird &&third) -{ - return TernaryElementwiseFunction<TFirst, TSecond, TThird>{std::forward<TFirst>(first), - std::forward<TSecond>(second), - std::forward<TThird>(third), TernaryFunction::Select}; -} - -/** Helper class used to extend a KernelWriter with additional functionality - * in order to make writing easier. - * - * This extension automatically handles creation of temporary variables, and - * allows nested function calls and operations. - * - * @tparam TWriter The type of KernelWriter to be overloaded. This must inherit from KernelWriter. - */ -template <class TWriter, typename = std::enable_if<std::is_base_of<KernelWriter, TWriter>::value>> -class KernelWriterHelper : public TWriter -{ -public: - using TWriter::TWriter; - - // ================================================== - // If-statements - // ================================================== - - // Un-hide original implementation, in case the original implementation is required. - using TWriter::op_if; - - /** Represents the if-statement: `if(\p cond) { \p body }`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] cond The BinaryExpression representing the condition. - * @param[in] body The body of the if-statement. - */ - KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TileOperand &, TileOperand &> &cond, - const std::function<void()> &body) - { - TWriter::op_if(cond.lhs, cond.opcode, cond.rhs, body); - return *this; - } - - /** Represents the if-statement: `if(\p cond) { \p body }`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] cond The BinaryExpression representing the condition. - * @param[in] body The body of the if-statement. - */ - template <typename TRight> - KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TileOperand &, TRight> &cond, - const std::function<void()> &body) - { - auto &tmp1 = declare_temp_tile(cond.lhs.tile_info()); - op_assign(tmp1, cond.rhs); - TWriter::op_if(cond.lhs, cond.opcode, tmp1, body); - return *this; - } - - /** Represents the if-statement: `if(\p cond) { \p body }`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] cond The BinaryExpression representing the condition. - * @param[in] body The body of the if-statement. - */ - template <typename TLeft> - KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TLeft, TileOperand &> &cond, - const std::function<void()> &body) - { - auto &tmp1 = declare_temp_tile(cond.rhs.tile_info()); - op_assign(tmp1, cond.lhs); - TWriter::op_if(tmp1, cond.opcode, cond.rhs, body); - return *this; - } - - // Un-hide original implementation, in case the original implementation is required. - using TWriter::op_else_if; - - /** Represents the else-if-statement: `else if(\p cond) { \p body }`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] cond The BinaryExpression representing the condition. - * @param[in] body The body of the else-if-statement. - */ - KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TileOperand &, TileOperand &> &cond, - const std::function<void()> &body) - { - TWriter::op_else_if(cond.lhs, cond.opcode, cond.rhs, body); - return *this; - } - - /** Represents the else-if-statement: `else if(\p cond) { \p body }`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] cond The BinaryExpression representing the condition. - * @param[in] body The body of the else-if-statement. - */ - template <typename TRight> - KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TileOperand &, TRight> &cond, - const std::function<void()> &body) - { - auto &tmp1 = declare_temp_tile(cond.lhs.tile_info()); - op_assign(tmp1, cond.rhs); - TWriter::op_else_if(cond.lhs, cond.opcode, tmp1, body); - return *this; - } - - /** Represents the else-if-statement: `else if(\p cond) { \p body }`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] cond The BinaryExpression representing the condition. - * @param[in] body The body of the else-if-statement. - */ - template <typename TLeft> - KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TLeft, TileOperand &> &cond, - const std::function<void()> &body) - { - auto &tmp1 = declare_temp_tile(cond.rhs.tile_info()); - op_assign(tmp1, cond.lhs); - TWriter::op_else_if(tmp1, cond.opcode, cond.rhs, body); - return *this; - } - - // ================================================== - // For-loops - // ================================================== - - // Un-hide original implementation, in case the original implementation is required. - using TWriter::op_for_loop; - - /** Represents the for-loop: `for(;\p cond; \p updater) { \p body }`. - * - * The BinaryExpression for the condition and the Assignment - * for the updater are unpacked and their components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] cond The BinaryExpression representing the condition. - * @param[in] updater The Assignment representing the updater. - * @param[in] body The body of the for-loop. - */ - void op_for_loop(const BinaryExpression<TileOperand &, TileOperand &> &cond, - const Assignment<TileOperand &, TileOperand &> &updater, - const std::function<void()> &body) - { - TWriter::op_for_loop(cond.lhs, cond.opcode, cond.rhs, updater.lhs, updater.opcode, updater.rhs, body); - } - - // ================================================== - // Unary expressions - // ================================================== - - // Un-hide original implementation, in case the original implementation is required. - using TWriter::op_assign; - - /** Represents the assignment: `\p dst = \p exp`. - * - * The UnaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The UnaryExpression representing the expression to be evaluated and assigned. - */ - void op_assign(const TileOperand &dst, const UnaryExpression<TileOperand &> &exp) - { - TWriter::op_unary_expression(dst, exp.opcode, exp.src); - } - - // ================================================== - // Binary expressions - // ================================================== - - /** Represents the assignment: `\p dst = \p exp`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The BinaryExpression representing the expression to be evaluated and assigned. - */ - void op_assign(const TileOperand &dst, const BinaryExpression<TileOperand &, TileOperand &> &exp) - { - TWriter::op_binary_expression(dst, exp.lhs, exp.opcode, exp.rhs); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The BinaryExpression representing the expression to be evaluated and assigned. - */ - template <typename TRight> - void op_assign(const TileOperand &dst, const BinaryExpression<TileOperand &, TRight> &exp) - { - std::cout << "Beginning assignment!" << std::endl; - auto &tmp1 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.rhs); - TWriter::op_binary_expression(dst, exp.lhs, exp.opcode, tmp1); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The BinaryExpression representing the expression to be evaluated and assigned. - */ - template <typename TLeft> - void op_assign(const TileOperand &dst, const BinaryExpression<TLeft, TileOperand &> &exp) - { - std::cout << "Beginning assignment!" << std::endl; - auto &tmp1 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.lhs); - TWriter::op_binary_expression(dst, tmp1, exp.opcode, exp.rhs); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The BinaryExpression is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The BinaryExpression representing the expression to be evaluated and assigned. - */ - template <typename TLeft, typename TRight> - void op_assign(const TileOperand &dst, const BinaryExpression<TLeft, TRight> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - auto &tmp2 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.lhs); - op_assign(tmp2, exp.rhs); - TWriter::op_binary_expression(dst, tmp1, exp.opcode, tmp2); - } - - // ================================================== - // Unary elementwise functions - // ================================================== - - /** Represents the assignment: `\p dst = \p exp`. - * - * The UnaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The UnaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - void op_assign(const TileOperand &dst, const UnaryElementwiseFunction<TileOperand &> &exp) - { - TWriter::op_unary_elementwise_function(dst, exp.opcode, exp.src); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The UnaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The UnaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TArg> - void op_assign(const TileOperand &dst, const UnaryElementwiseFunction<TArg> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.lhs); - TWriter::op_unary_elementwise_function(dst, exp.opcode, tmp1); - } - - // ================================================== - // Binary elementwise functions - // ================================================== - - /** Represents the assignment: `\p dst = \p exp`. - * - * The BinaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The BinaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - void op_assign(const TileOperand &dst, const BinaryElementwiseFunction<TileOperand &, TileOperand &> &exp) - { - TWriter::op_binary_elementwise_function(dst, exp.opcode, exp.first, exp.second); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The BinaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The BinaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TRight> - void op_assign(const TileOperand &dst, const BinaryElementwiseFunction<TileOperand &, TRight> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.second); - TWriter::op_binary_elementwise_function(dst, exp.opcode, exp.first, tmp1); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The BinaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The BinaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TLeft> - void op_assign(const TileOperand &dst, const BinaryElementwiseFunction<TLeft, TileOperand &> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.first); - TWriter::op_binary_elementwise_function(dst, exp.opcode, tmp1, exp.second); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The BinaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The BinaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TLeft, typename TRight> - void op_assign(const TileOperand &dst, const BinaryElementwiseFunction<TLeft, TRight> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - auto &tmp2 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.first); - op_assign(tmp2, exp.second); - TWriter::op_binary_elementwise_function(dst, exp.opcode, tmp1, tmp2); - } - - // ================================================== - // Ternary elementwise functions - // ================================================== - - /** Represents the assignment: `\p dst = \p exp`. - * - * The TernaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - void op_assign(const TileOperand &dst, - const TernaryElementwiseFunction<TileOperand &, TileOperand &, TileOperand &> &exp) - { - TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, exp.second, exp.third); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The TernaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TFirst> - void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TFirst, TileOperand &, TileOperand &> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.first); - TWriter::op_ternary_elementwise_function(dst, exp.opcode, tmp1, exp.second, exp.third); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The TernaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TSecond> - void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TileOperand &, TSecond, TileOperand &> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.second); - TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, tmp1, exp.third); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The TernaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TThird> - void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TileOperand &, TileOperand &, TThird> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.third); - TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, exp.second, tmp1); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The TernaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TFirst, typename TSecond> - void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TFirst, TSecond, TileOperand &> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - auto &tmp2 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.first); - op_assign(tmp2, exp.second); - TWriter::op_ternary_elementwise_function(dst, exp.opcode, tmp1, tmp2, exp.third); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The TernaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TFirst, typename TThird> - void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TFirst, TileOperand &, TThird> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - auto &tmp2 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.first); - op_assign(tmp2, exp.third); - TWriter::op_ternary_elementwise_function(dst, exp.opcode, tmp1, exp.second, tmp2); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The TernaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TSecond, typename TThird> - void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TileOperand &, TSecond, TThird> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info()); - auto &tmp2 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.second); - op_assign(tmp2, exp.third); - TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, tmp1, tmp2); - } - - /** Represents the assignment: `\p dst = \p exp`. - * - * The TernaryElementwiseFunction is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] dst The tile which is assigned to. - * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned. - */ - template <typename TFirst, typename TSecond, typename TThird> - void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TFirst, TSecond, TThird> &exp) - { - auto &tmp1 = declare_temp_tile(dst.tile_info(), dst.tile_info(), dst.tile_info()); - auto &tmp2 = declare_temp_tile(dst.tile_info()); - auto &tmp3 = declare_temp_tile(dst.tile_info()); - op_assign(tmp1, exp.first); - op_assign(tmp2, exp.second); - op_assign(tmp3, exp.third); - TWriter::op_ternary_elementwise_function(dst, exp.opcode, tmp1, tmp2, tmp3); - } - - // ================================================== - // Assignments - // ================================================== - - /** Represents the assignment: `\p lhs += \p rhs` or `\p lhs -= \p rhs`. - * - * The Assignment is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @param[in] exp The Assignment representing the expression to be evaluated. - */ - void op_assign(const Assignment<TileOperand &, TileOperand &> &exp) - { - if (exp.opcode == AssignmentOp::Increment) - { - TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Add, exp.rhs); - } - else if (exp.opcode == AssignmentOp::Decrement) - { - TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Sub, exp.rhs); - } - } - - /** Represents the assignment: `\p lhs += \p rhs` or `\p lhs -= \p rhs`. - * - * The Assignment is unpacked and its components are forwarded to - * the underlying KernelWriter's implementation. - * - * @tparam TRight The type of the RHS of the assignment. - * @param[in] exp The Assignment representing the expression to be evaluated. - */ - template <typename TRight> - void op_assign(const Assignment<TileOperand &, TRight> &exp) - { - auto &tmp1 = declare_temp_tile(exp.lhs.tile_info()); - op_assign(tmp1, exp.rhs); - op_assign(Assignment<TileOperand &, TileOperand &>{exp.lhs, tmp1, exp.opcode}); - } - -private: - unsigned int temp_var_counter = 0; - - /** Return the current counter value, then increment it. - * - * @return The current counter value. - */ - int next_ctr() - { - return temp_var_counter++; - } - - /** Gets the next temporary variable counter value, - * and returns a suitable temporary variable name. - * - * @return A temporary variable name. - */ - std::string next_tmp_var_name() - { - return "tmp_" + std::to_string(next_ctr()); - } - - /** Returns the argument. - * - * Used for recursion with the variadic function version of this function. - * - * @param[in] arg The TileInfo to return. - * @return The \p arg. - */ - TileInfo get_largest_size(const TileInfo &arg) - { - return arg; - } - - /** Returns a TileInfo object where the size in each dimension (width, height) is the largest - * of either TileInfo argument in the corresponding dimension. - * - * @tparam TOps Must be of TileInfo type. - * @param[in] first A TileInfo object. - * @param[in] second A TileInfo object. - * @param[in] ops A number of TileInfo objects. - * @return A TileInfo object which represents the largest shape in each dimension across the arguments. - */ - template <typename... TOps, typename = ::std::enable_if_t<std::is_same<TOps..., TileInfo>::value>> - TileInfo get_largest_size(const TileInfo &first, const TileInfo &second, const TOps &...ops) - { - TileInfo largest = {first.data_type(), std::max(first.width(), second.width()), - std::max(first.height(), second.height())}; - return get_largest_size(largest, ops...); - } - - /** Helper function to define a suitable TileOperand with appropriate TileInfo - * such that broadcasting is taken into account, based on the arguments provided. - * - * @tparam TArgs Must be of TileInfo type. - * @param[in] args A number of TileInfo which determine the shape of the TileOperand to declare. - * @return A newly created TileOperand. - */ - template <typename... TArgs, typename = ::std::enable_if_t<std::is_same<TArgs..., TileInfo>::value>> - TileOperand &declare_temp_tile(const TArgs &...args) - { - return TWriter::declare_tile(next_tmp_var_name().c_str(), get_largest_size(args...)); - } -}; - -} // namespace ckw - -#endif // CKW_INCLUDE_CKW_KERNELWRITERHELPER_H diff --git a/compute_kernel_writer/prototype/include/ckw/OperandBase.h b/compute_kernel_writer/prototype/include/ckw/OperandBase.h deleted file mode 100644 index 9842127339..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/OperandBase.h +++ /dev/null @@ -1,78 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H -#define CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H - -#include "ckw/types/DataType.h" - -#include <string> - -namespace ckw -{ -namespace prototype -{ -class IGpuKernelWriter; - -class Operand; -} // namespace prototype - -/** The base class for all operands. */ -class OperandBase -{ -public: - /** Constructor - * - * @param[in] name The name of the operand. - */ - explicit OperandBase(const ::std::string &name); - - /** Destructor */ - virtual ~OperandBase(); - - /** (Internal use only) Create the implementation operand. - * - * @param[in] writer The implementation kernel writer. - */ - virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const = 0; - - /** Get the name of the operand. */ - const ::std::string &name() const; - - /** Set the name of the operand. */ - OperandBase &name(const ::std::string &name); - - /** Get the data type of the operand. */ - virtual DataType data_type() const = 0; - - /** Get whether the operand is compile-time constant. */ - virtual bool is_constant() const = 0; - -private: - ::std::string _name; -}; - -} // namespace ckw - -#endif // CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H diff --git a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h deleted file mode 100644 index 2a9c42acc8..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h +++ /dev/null @@ -1,137 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_SCALARVALUE_H -#define CKW_PROTOTYPE_INCLUDE_CKW_SCALARVALUE_H - -#include "ckw/Error.h" - -#include <cstdint> - -namespace ckw -{ - -/** The scalar value known at compile-time. */ -class ScalarValue -{ -public: - /** Initialize a new instance of @ref ScalarValue class with integer value 0. */ - ScalarValue() - { - _type = Type::INT; - _value.i64 = 0; - } - - /** Initialize a new instance of @ref ScalarValue class with the specified value. */ - template <typename T> - ScalarValue(T value) - { - set(value); - } - - /** Set the value. */ - template <typename T> - void set(T value) - { - CKW_ASSERT(::std::is_integral<T>::value || ::std::is_floating_point<T>::value); - CKW_ASSERT(sizeof(T) <= 8); - - _size = sizeof(T); - - if (::std::is_integral<T>::value) - { - if (::std::is_signed<T>::value) - { - _type = Type::INT; - _value.i64 = value; - } - else - { - _type = Type::UINT; - _value.u64 = value; - } - } - else - { - _type = Type::FLOAT; - _value.f64 = value; - } - } - - /** Get the value. - * - * The caller must make sure that what has been stored in the object must fit - * the output data type without data corruption or loss of accuracy. - */ - template <typename T> - T get() const - { - CKW_ASSERT(::std::is_integral<T>::value || ::std::is_floating_point<T>::value); - CKW_ASSERT(sizeof(T) >= _size); - - if (::std::is_integral<T>::value) - { - if (::std::is_signed<T>::value) - { - CKW_ASSERT(_type == Type::INT || _type == Type::UINT); - CKW_ASSERT_IF(_type == Type::UINT, sizeof(T) > _size); - - return _value.i64; - } - else - { - CKW_ASSERT(_type == Type::INT); - - return _value.u64; - } - } - else - { - return _value.f64; - } - } - -private: - union Value - { - int64_t i64; - uint64_t u64; - double f64; - }; - - enum class Type : int32_t - { - UINT, - INT, - FLOAT, - }; - - Value _value{}; - Type _type{}; - uint32_t _size{}; -}; - -} // namespace ckw - -#endif // CKW_PROTOTYPE_INCLUDE_CKW_SCALARVALUE_H diff --git a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h deleted file mode 100644 index 24da7dc8ab..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h +++ /dev/null @@ -1,153 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H -#define CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H - -#include "ckw/types/DataType.h" - -#include <array> -#include <cstdint> - -namespace ckw -{ -/** Compute Kernel Writer tensor data layout (or memory format) */ -enum class TensorDataLayout -{ - Unknown, - Nhwc, - Ndhwc -}; - -/** Compute Kernel Writer tensor data layout component */ -enum class TensorDataLayoutComponent -{ - Unknown, - N, - D, - H, - W, - C, -}; - -/** Compute Kernel Writer tensor component bitmask. The bitmask can be used to retrieve - * the info from @ref TensorComponent. - */ -enum class TensorComponentBitmask : uint32_t -{ - OffsetFirstElement = 0x01000000, // For example, OffsetFirstElement in @ref TensorComponent - Stride = 0x02000000, // For example, stride0 in @ref TensorComponent - Dimension = 0x04000000, // For example, Dim0 in @ref TensorComponent - FoldedDimensions = 0x08000000, // For example, Dim0xDim1 in @ref TensorComponent -}; - -/** Compute Kernel Writer tensor component. The tensor components are used to access specific backend-agnostic tensor arguments, - * such as the tensor dimensions and tensor strides. - * The data type is represented as an integer. The value of the integer value - * is assigned to retrieve the information through the @ref TensorComponentBitmask. - */ -enum class TensorComponentType : uint32_t -{ - Unknown = 0x00000000, - OffsetFirstElement = 0x01000000, - Stride0 = 0x02000001, - Stride1 = 0x02000010, - Stride2 = 0x02000100, - Stride3 = 0x02001000, - Stride4 = 0x02010000, - Dim0 = 0x04000001, - Dim1 = 0x04000010, - Dim2 = 0x04000100, - Dim3 = 0x04001000, - Dim4 = 0x04010000, - Dim1xDim2 = 0x08000110, - Dim2xDim3 = 0x08001100, - Dim1xDim2xDim3 = 0x08001110 -}; - -/** Compute Kernel Writer tensor storage. The tensor storage represents the type of tensor memory object. - */ -enum class TensorStorageType : uint32_t -{ - Unknown = 0x00000000, - BufferUint8Ptr = 0x01000000, - Texture2dReadOnly = 0x02000001, - Texture2dWriteOnly = 0x02000010, -}; - -/** Compute Kernel Writer tensor shape - * Negative dimensions can be interpreted as dynamic dimensions by the Compute Kernel Writer - */ -using TensorShape = std::array<int32_t, 5>; - -/** Compute Kernel Writer tensor info */ -class TensorInfo -{ -public: - /** Constructor - * - * @param[in] dt Tensor data type - * @param[in] shape Tensor shape - * @param[in] dl Tensor data layout - * @param[in] id Tensor id. The id is used to keep track of the bound user tensor. Through the id, - * the user can know what tensor has been used by the Compute Kernel Writer. - * Possible id values: - * - greater than or equal to 0: bind a user specific tensors - * - less than 0: bind a virtual tensor (tile) - */ - TensorInfo(DataType dt, const TensorShape &shape, TensorDataLayout dl, int32_t id); - - /** Set shape */ - TensorInfo &shape(const TensorShape &shape); - - /** Get shape */ - TensorShape shape() const; - - /** Set data type */ - TensorInfo &data_type(DataType dt); - - /** Get data type */ - DataType data_type() const; - - /** Set data layout */ - TensorInfo &data_layout(TensorDataLayout dl); - - /** Get data layout */ - TensorDataLayout data_layout() const; - - /** Set id */ - TensorInfo &id(int32_t id); - - /** Get layout */ - int32_t id() const; - -private: - TensorShape _shape{{0}}; - DataType _dt{DataType::Unknown}; - TensorDataLayout _dl{TensorDataLayout::Unknown}; - int32_t _id{-1}; -}; -} // namespace ckw - -#endif /* CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H */ diff --git a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h deleted file mode 100644 index c221b449fa..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h +++ /dev/null @@ -1,196 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TENSOROPERAND_H -#define CKW_PROTOTYPE_INCLUDE_CKW_TENSOROPERAND_H - -#include "ckw/OperandBase.h" -#include "ckw/TensorInfo.h" -#include "ckw/TensorTileSampler.h" -#include "ckw/TileOperand.h" -#include "ckw/types/DataType.h" - -#include <memory> - -namespace ckw -{ - -class TensorComponentOperand; - -// ================================================================================================= -// TensorOperand -// ================================================================================================= - -/** Tensor operand */ -class TensorOperand : public OperandBase -{ -public: - /** Initialize a new instance of @ref TensorOperand class. - * - * @param[in] name The name of the tensor. - * @param[in] info The tensor info. - * @param[in] storage_type The tensor storage type. - */ - TensorOperand(const ::std::string &name, const TensorInfo &info, TensorStorageType storage_type); - - /** No copy constructor. */ - TensorOperand(const TensorOperand &other) = delete; - - /** No copy assignment. */ - TensorOperand &operator=(const TensorOperand &other) = delete; - - /** (Internal use only) Create the implementation operand. - * - * @param[in] writer The implementation kernel writer. - */ - virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override; - - /** Get the tensor info. */ - const TensorInfo &info() const; - - /** Get the tensor info. */ - TensorInfo &info(); - - /** Get the tensor storage type. */ - TensorStorageType storage_type() const; - - /** Get the data type. */ - virtual DataType data_type() const override; - - /** Get whether the tensor is compile-time constant. */ - virtual bool is_constant() const override; - - /** Get the default tile attached to the tensor. */ - const TileOperand &tile() const; - - /** Get the default tile attached to the tensor. */ - TileOperand &tile(); - - /** Set the default tile attached to the tensor. */ - TensorOperand &tile(TileOperand &tile); - - /** Get the tensor sampler of the default tile. */ - const TensorTileSampler &tile_sampler() const; - - /** Get the tensor sampler of the default tile. */ - TensorTileSampler &tile_sampler(); - - /** Set the tensor sampler of the default tile. */ - TensorOperand &tile_sampler(const TensorTileSampler &value); - - /** Get the operand that contains the stride in y dimension of the tensor. */ - TensorComponentOperand &stride1(); - - /** Get the operand that contains the stride in z dimension of the tensor. */ - TensorComponentOperand &stride2(); - - /** Get the operand that contains the stride in w dimension of the tensor. */ - TensorComponentOperand &stride3(); - - /** Get the operand that contains the stride in w dimension of the tensor. */ - TensorComponentOperand &stride4(); - - /** Get the operand that contains the size of dimension 0 of the tensor. */ - TensorComponentOperand &dim0(); - - /** Get the operand that contains the size of dimension 1 of the tensor. */ - TensorComponentOperand &dim1(); - - /** Get the operand that contains the size of dimension 2 of the tensor. */ - TensorComponentOperand &dim2(); - - /** Get the operand that contains the size of dimension 3 of the tensor. */ - TensorComponentOperand &dim3(); - - /** Get the operand that contains the size of dimension 4 of the tensor. */ - TensorComponentOperand &dim4(); - - /** Get the operand that contains the size of dimensions 1 and 2 collapsed. */ - TensorComponentOperand &dim1_dim2(); - - /** Get the operand that contains the size of dimensions 1, 2 and 3 collapsed. */ - TensorComponentOperand &dim1_dim2_dim3(); - - /** Get the operand that contains the offset in bytes to the first element. */ - TensorComponentOperand &offset_first_element_in_bytes(); - -private: - TensorInfo _info; - TensorStorageType _storage_type; - - TileOperand *_tile{nullptr}; - TensorTileSampler _tile_sampler{}; - - ::std::unique_ptr<TensorComponentOperand> _stride1{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _stride2{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _stride3{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _stride4{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _dim0{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _dim1{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _dim2{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _dim3{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _dim4{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _dim1_dim2{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _dim1_dim2_dim3{nullptr}; - ::std::unique_ptr<TensorComponentOperand> _offset_first_element_in_bytes{nullptr}; -}; - -// ================================================================================================= -// TensorComponentOperand -// ================================================================================================= - -/** Tile operand that contains tensor information. */ -class TensorComponentOperand : public TileOperand -{ -public: - /** Initialize a new instance of @ref TensorComponentOperand class. - * - * @param[in] tensor The tensor operand. - * @param[in] component The tensor info component. - */ - TensorComponentOperand(TensorOperand &tensor, TensorComponentType component); - - /** Get the tensor operand. */ - TensorOperand &tensor(); - - /** Get the tensor operand. */ - const TensorOperand &tensor() const; - - /** Get the tensor component. */ - TensorComponentType component_type() const; - - /** (Internal use only) Create the implementation operand. - * - * @param[in] writer The implementation kernel writer. - */ - virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override; - -private: - TensorOperand &_tensor; - TensorComponentType _component; -}; - -} // namespace ckw - -#endif // CKW_PROTOTYPE_INCLUDE_CKW_TENSOROPERAND_H diff --git a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h deleted file mode 100644 index 606dec3535..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h +++ /dev/null @@ -1,169 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H -#define CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H - -#include "ckw/types/TensorSamplerTypes.h" - -#include <functional> - -namespace ckw -{ - -class TileOperand; - -/** Tensor sampler - * - * It contains information about how the result tile should be stored to tensor memory. - * It can also be used to dictate how the subsequent operators fetch the input tensor. - */ -class TensorTileSampler -{ -public: - /** Initialize a new instance of @ref TensorSampler class. */ - TensorTileSampler(); - - /** Initialize a new instance of @ref TensorSampler class. - * - * @param[in] x The coordinate in the x dimension. - * @param[in] y The coordinate in the y dimension. - * @param[in] z The coordinate in the z dimension. - * @param[in] b The coordinate in the batch dimension. - * @param[in] format The tensor data format. - * @param[in] address_mode_x The address mode of the x dimension. - * @param[in] address_mode_y The address mode of the y dimension. - * @param[in] address_mode_z The address mode of the z dimension. - */ - TensorTileSampler(TileOperand &x, - TileOperand &y, - TileOperand &z, - TileOperand &b, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z); - - /** Initialize a new instance of @ref TensorSampler class. - * - * @param[in] x The coordinate in the x dimension. - * @param[in] y The coordinate in the y dimension. - * @param[in] z The coordinate in the z dimension. - * @param[in] b The coordinate in the batch dimension. - * @param[in] height The height of the tile. - * @param[in] width The width of the tile. - * @param[in] format The tensor data format. - * @param[in] address_mode_x The address mode of the x dimension. - * @param[in] address_mode_y The address mode of the y dimension. - * @param[in] address_mode_z The address mode of the z dimension. - */ - TensorTileSampler(TileOperand &x, - TileOperand &y, - TileOperand &z, - TileOperand &b, - int32_t height, - int32_t width, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z); - - /** Get the coordinate in the x dimension. */ - const TileOperand &x() const; - - /** Set the coordinate in the x dimension. */ - TensorTileSampler &x(TileOperand &x); - - /** Get the coordinate in the y dimension. */ - const TileOperand &y() const; - - /** Set the coordinate in the y dimension. */ - TensorTileSampler &y(TileOperand &y); - - /** Get the coordinate in the z dimension. */ - const TileOperand &z() const; - - /** Set the coordinate in the z dimension. */ - TensorTileSampler &z(TileOperand &z); - - /** Get the coordinate in the batch dimension. */ - const TileOperand &b() const; - - /** Set the coordinate in the batch dimension. */ - TensorTileSampler &b(TileOperand &b); - - /** Get the width of the tile. */ - int32_t width() const; - - /** Set the width of the tile. */ - TensorTileSampler &width(int32_t width); - - /** Get the height of the tile. */ - int32_t height() const; - - /** Set the height of the tile. */ - TensorTileSampler &height(int32_t height); - - /** Get the format of the tensor. */ - TensorSamplerFormat format() const; - - /** Set the format of the tensor. */ - TensorTileSampler &format(TensorSamplerFormat format); - - /** Get the address mode of the x dimension. */ - TensorSamplerAddressModeX address_mode_x() const; - - /** Set the address mode of the x-dimension. */ - TensorTileSampler &address_mode_x(TensorSamplerAddressModeX address_mode_x); - - /** Get the address mode of the y dimension. */ - TensorSamplerAddressModeY address_mode_y() const; - - /** Set the address mode of the y dimension. */ - TensorTileSampler &address_mode_y(TensorSamplerAddressModeY address_mode_y); - - /** Get the address mode of the z dimension. */ - TensorSamplerAddressModeZ address_mode_z() const; - - /** Set the address mode of the z dimension. */ - TensorTileSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z); - -private: - TileOperand *_x{nullptr}; - TileOperand *_y{nullptr}; - TileOperand *_z{nullptr}; - TileOperand *_b{nullptr}; - - int32_t _height{0}; - int32_t _width{0}; - - TensorSamplerFormat _format{TensorSamplerFormat::Unknown}; - TensorSamplerAddressModeX _address_mode_x{TensorSamplerAddressModeX::Unknown}; - TensorSamplerAddressModeY _address_mode_y{TensorSamplerAddressModeY::Unknown}; - TensorSamplerAddressModeZ _address_mode_z{TensorSamplerAddressModeZ::Unknown}; -}; - -} // namespace ckw - -#endif // CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H diff --git a/compute_kernel_writer/prototype/include/ckw/TileInfo.h b/compute_kernel_writer/prototype/include/ckw/TileInfo.h deleted file mode 100644 index e0d064169e..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/TileInfo.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TILEINFO_H -#define CKW_PROTOTYPE_INCLUDE_CKW_TILEINFO_H - -#include "ckw/types/DataType.h" - -#include <array> -#include <cstdint> - -namespace ckw -{ -// Constants to access the tile width and height in the TileShape -constexpr int32_t kTileWidthIdx = 0; -constexpr int32_t kTileHeightIdx = 1; - -/** Compute Kernel Writer tile shape. It is used to define the shape of the tile */ -using TileShape = std::array<int32_t, 2>; - -/** Compute Kernel Writer tile info */ -class TileInfo -{ -public: - /** Constructor used to initialize a scalar variable with a given data type - * - * @param[in] dt Tile data type - */ - TileInfo(DataType dt); - - /** Constructor used to initialize a vector with a given data type and vector length. - * - * @param[in] dt Tile data type - * @param[in] w Tile width (or vector length) - */ - TileInfo(DataType dt, int32_t w); - - /** Constructor used to initialize a tile with a given data type and tile sizes. - * - * @param[in] dt Tile data type - * @param[in] h Tile height - * @param[in] w Tile width - */ - TileInfo(DataType dt, int32_t h, int32_t w); - - /** Set width */ - TileInfo &width(int32_t w); - - /** Get width */ - int32_t width() const; - - /** Set height */ - TileInfo &height(int32_t h); - - /** Get height */ - int32_t height() const; - - /** Set data type */ - TileInfo &data_type(DataType dt); - - /** Get data type */ - DataType data_type() const; - -private: - DataType _dt{DataType::Unknown}; - TileShape _shape{}; -}; - -} // namespace ckw - -#endif /* COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TILEINFO_H */ diff --git a/compute_kernel_writer/prototype/include/ckw/TileOperand.h b/compute_kernel_writer/prototype/include/ckw/TileOperand.h deleted file mode 100644 index 24ee373a24..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/TileOperand.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TILEOPERAND_H -#define CKW_PROTOTYPE_INCLUDE_CKW_TILEOPERAND_H - -#include "ckw/Error.h" -#include "ckw/OperandBase.h" -#include "ckw/ScalarValue.h" -#include "ckw/TileInfo.h" - -#include <vector> - -namespace ckw -{ - -class Kernel; - -using TileContainer = std::vector<std::vector<std::string>>; - -/** Tile operand which can be either scalar, vector or 2D tile. */ -class TileOperand : public OperandBase -{ -public: - /** Initialize a new instance of @ref TileOperand class with the tile information. - * - * @param[in] name The name of the tile. - * @param[in] tile_info The tile info. - */ - TileOperand(const ::std::string &name, const TileInfo &tile_info); - - /** Initialize a new instance of @ref TileOperand for scalar variable. - * - * @param[in] name The name of the tile. - * @param[in] data_type The data type of the tile. - */ - TileOperand(const ::std::string &name, DataType data_type); - - /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable. - * - * @param[in] name The name of the tile. - * @param[in] value The value of the tile. - */ - TileOperand(const ::std::string &name, int32_t value); - - /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable. - * - * @param[in] name The name of the tile. - * @param[in] value The value of the tile. - */ - TileOperand(const ::std::string &name, float value); - - /** Initialize a new instance of @ref TileOperand for compile-time constant variable. - * - * @param[in] name The name of the tile. - * @param[in] value The value of the tile. - */ - TileOperand(const ::std::string &name, const ::std::vector<std::vector<std::string>> &value, DataType dt); - - /** Prohibit copy of tile operand. */ - TileOperand(const TileOperand &) = delete; - - /** Prohibit copy of tile operand. */ - TileOperand &operator=(const TileOperand &) = delete; - - /** (Internal use only) Create the implementation operand. - * - * @param[in] writer The implementation kernel writer. - */ - virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override; - - /** Get the tile info. */ - const TileInfo &tile_info() const; - - /** Get the data type of the tile. */ - virtual DataType data_type() const override; - - /** Get whether the tile is compile-time constant. */ - virtual bool is_constant() const override; - - /** Get whether the tile is a scalar value. */ - bool is_scalar() const; - - /** Get the scalar value of the tile. - * - * The tile must have the shape of 1, 1 (i.e. scalar). - * - * @return Scalar value as a string. - */ - std::string scalar_value() const; - - /** Get the values of the tile. - * - * @return 2D container of values. - */ - const TileContainer &value() const; - -private: - TileInfo _info; - TileContainer _value{}; - bool _constant; -}; - -} // namespace ckw - -#endif // CKW_PROTOTYPE_INCLUDE_CKW_TILEOPERAND_H diff --git a/compute_kernel_writer/prototype/include/ckw/types/ConvertPolicy.h b/compute_kernel_writer/prototype/include/ckw/types/ConvertPolicy.h deleted file mode 100644 index 2a198507eb..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/types/ConvertPolicy.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_CONVERTPOLICY_H -#define CKW_INCLUDE_CKW_CONVERTPOLICY_H - -#include <cstdint> - -namespace ckw -{ - -enum class ConvertPolicy : int32_t -{ - None = 0, // No policy specified. - Saturate = 1, // Saturated. -}; - -} // namespace ckw - -#endif //CKW_INCLUDE_CKW_CONVERTPOLICY_H diff --git a/compute_kernel_writer/prototype/include/ckw/types/DataType.h b/compute_kernel_writer/prototype/include/ckw/types/DataType.h deleted file mode 100644 index 3447dd61d6..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/types/DataType.h +++ /dev/null @@ -1,50 +0,0 @@ -/* -* Copyright (c) 2023 Arm Limited. -* -* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person obtaining a copy -* of this software and associated documentation files (the "Software"), to -* deal in the Software without restriction, including without limitation the -* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -* sell copies of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in all -* copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef CKW_INCLUDE_CKW_DATATYPE_H -#define CKW_INCLUDE_CKW_DATATYPE_H - -#include <cstdint> - -namespace ckw -{ - -/** Compute Kernel Writer data types. This data type is used by the code variables and tensor arguments. */ -enum class DataType : int32_t -{ - Unknown = 0x00, - Fp32 = 0x11, - Fp16 = 0x12, - Int32 = 0x21, - Int16 = 0x22, - Int8 = 0x24, - Uint32 = 0x31, - Uint16 = 0x32, - Uint8 = 0x34, - Bool = 0x41 -}; - -} // namespace ckw - -#endif //CKW_INCLUDE_CKW_DATATYPE_H diff --git a/compute_kernel_writer/prototype/include/ckw/types/Functions.h b/compute_kernel_writer/prototype/include/ckw/types/Functions.h deleted file mode 100644 index c6afaa0ac8..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/types/Functions.h +++ /dev/null @@ -1,62 +0,0 @@ -/* -* Copyright (c) 2023 Arm Limited. -* -* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person obtaining a copy -* of this software and associated documentation files (the "Software"), to -* deal in the Software without restriction, including without limitation the -* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -* sell copies of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in all -* copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef CKW_INCLUDE_CKW_FUNCTIONS_H -#define CKW_INCLUDE_CKW_FUNCTIONS_H - -#include <cstdint> - -namespace ckw -{ - -enum class UnaryFunction : int32_t -{ - Exp = 0x0000, - Tanh = 0x0001, - Sqrt = 0x0002, - Erf = 0x0003, - Fabs = 0x0004, - Log = 0x0006, - Round = 0x0007, - Floor = 0x0008, - - // Misc - SizeOf = 0x0009, -}; - -enum class BinaryFunction : int32_t -{ - Min = 0x0000, - Max = 0x0001, -}; - -enum class TernaryFunction : int32_t -{ - Select = 0x0000, - Clamp = 0x0001, -}; - -} // namespace ckw - -#endif //CKW_INCLUDE_CKW_FUNCTIONS_H diff --git a/compute_kernel_writer/prototype/include/ckw/types/GpuTargetLanguage.h b/compute_kernel_writer/prototype/include/ckw/types/GpuTargetLanguage.h deleted file mode 100644 index 6c08617949..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/types/GpuTargetLanguage.h +++ /dev/null @@ -1,41 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_GPUTARGETLANGUAGE_H -#define CKW_INCLUDE_CKW_GPUTARGETLANGUAGE_H - -#include <cstdint> - -namespace ckw -{ - -enum class GpuTargetLanguage : int32_t -{ - Unknown, - OpenCL -}; - -} // namespace ckw - -#endif //CKW_INCLUDE_CKW_GPUTARGETLANGUAGE_H diff --git a/compute_kernel_writer/prototype/include/ckw/types/Operators.h b/compute_kernel_writer/prototype/include/ckw/types/Operators.h deleted file mode 100644 index b560996837..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/types/Operators.h +++ /dev/null @@ -1,78 +0,0 @@ -/* -* Copyright (c) 2023 Arm Limited. -* -* SPDX-License-Identifier: MIT -* -* Permission is hereby granted, free of charge, to any person obtaining a copy -* of this software and associated documentation files (the "Software"), to -* deal in the Software without restriction, including without limitation the -* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or -* sell copies of the Software, and to permit persons to whom the Software is -* furnished to do so, subject to the following conditions: -* -* The above copyright notice and this permission notice shall be included in all -* copies or substantial portions of the Software. -* -* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR -* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, -* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE -* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER -* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, -* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE -* SOFTWARE. -*/ - -#ifndef CKW_INCLUDE_CKW_OPERATORS_H -#define CKW_INCLUDE_CKW_OPERATORS_H - -#include <cstdint> - -namespace ckw -{ - -enum class UnaryOp : int32_t -{ - LogicalNot = 0x0000, // ! - BitwiseNot = 0x0001, // ~ - Negate = 0x0002, // - -}; - -/* Binary operations -*/ -enum class BinaryOp : int32_t -{ - // Elementwise - Add = 0x0000, // + - Sub = 0x0001, // - - Mul = 0x0002, // * - Div = 0x0003, // / - Mod = 0x0004, // % - // Relational - Equal = 0x1000, // == - Less = 0x1001, // < - LessEqual = 0x1002, // <= - Greater = 0x1003, // > - GreaterEqual = 0x1004, // >= - // Algebra - MatMul_Nt_Nt = 0x2000, // X - MatMul_Nt_T = 0x2001, // X - MatMul_T_Nt = 0x2002, // X - MatMul_T_T = 0x2003, // X - Dot = 0x2004, // . - // Logical - LogicalAnd = 0x3000, // && - LogicalOr = 0x3001, // || - // Bitwise - BitwiseXOR = 0x4000, // ^ -}; - -enum class AssignmentOp : int32_t -{ - // Unary - Increment = 0x0000, // += - Decrement = 0x0001, // -= -}; - -} // namespace ckw - -#endif //CKW_INCLUDE_CKW_OPERATORS_H diff --git a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h b/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h deleted file mode 100644 index 63405a0764..0000000000 --- a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h +++ /dev/null @@ -1,82 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H -#define CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H - -#include <cstdint> - -namespace ckw -{ - -enum class TensorSamplerFormat : int32_t -{ - Unknown = 0, - C_WH_1 = 1, - C_W_H = 2 -}; - -enum class TensorSamplerAddressModeX : int32_t -{ - Unknown = 0, - None = 1, // The user guarantees that the X coordinate is always in-bound - OverlappingMin = - 2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length - // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice. -}; - -enum class TensorSamplerAddressModeY : int32_t -{ - Unknown = 0, - None = 1, // The user guarantees that the Y coordinate is always in-bound - OverlappingMin = - 2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length - Skip = 3, // Skip the read/write - SkipMinEdgeOnly = - 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 - SkipMaxEdgeOnly = 5, // Skip less than 0 only - ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) - ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX - ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 - ClampToBorder = 9, // Clamp to border which always has 0 value - ClampToBorderMinEdgeOnly = 10, - ClampToBorderMaxEdgeOnly = 11 -}; - -enum class TensorSamplerAddressModeZ : int32_t -{ - Unknown = 0, - None = 1, // The user guarantees that the Y coordinate is always in-bound - Skip = 3, // Skip the read/write - SkipMinEdgeOnly = - 4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0 - SkipMaxEdgeOnly = 5, // Skip less than 0 only - ClampToNearest = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y) - ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX - ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0 -}; - -} // namespace ckw - -#endif //CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H diff --git a/compute_kernel_writer/prototype/src/Kernel.cpp b/compute_kernel_writer/prototype/src/Kernel.cpp deleted file mode 100644 index 6228ed17d0..0000000000 --- a/compute_kernel_writer/prototype/src/Kernel.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/Kernel.h" - -#include "ckw/TensorOperand.h" -#include "ckw/types/GpuTargetLanguage.h" - -#include "src/Prototype.h" - -namespace ckw -{ - -Kernel::Kernel(GpuTargetLanguage language) : Kernel{"unnamed", language} -{ -} - -Kernel::Kernel(const char *name, GpuTargetLanguage language) - : _name(name), - _kernel(std::make_unique<prototype::GpuKernelWriterDataHolder>(language)), - _operands{}, - _tensor_id_operands{} -{ -} - -Kernel::~Kernel() -{ -} - -const std::string &Kernel::name() const -{ - return _name; -} - -void Kernel::name(const std::string &name) -{ - _name = name; -} -std::vector<KernelArgument> Kernel::arguments() const -{ - std::vector<KernelArgument> arguments; - - const auto impl_args = _kernel->arguments.tensor_argument_declarations(); - - for (auto tensor_arg : impl_args) - { - auto tensor = _tensor_id_operands.at(tensor_arg->format().id); - arguments.push_back(*tensor); - - for (auto component_arg : tensor_arg->component_declarations()) - { - switch (component_arg) - { - case TensorComponentType::OffsetFirstElement: - arguments.push_back(tensor->offset_first_element_in_bytes()); - break; - - case TensorComponentType::Stride1: - arguments.push_back(tensor->stride1()); - break; - - case TensorComponentType::Stride2: - arguments.push_back(tensor->stride2()); - break; - - case TensorComponentType::Stride3: - arguments.push_back(tensor->stride3()); - break; - - case TensorComponentType::Stride4: - arguments.push_back(tensor->stride4()); - break; - - case TensorComponentType::Dim0: - arguments.push_back(tensor->dim0()); - break; - - case TensorComponentType::Dim1: - arguments.push_back(tensor->dim1()); - break; - - case TensorComponentType::Dim2: - arguments.push_back(tensor->dim2()); - break; - - case TensorComponentType::Dim3: - arguments.push_back(tensor->dim3()); - break; - - case TensorComponentType::Dim4: - arguments.push_back(tensor->dim4()); - break; - - case TensorComponentType::Dim1xDim2: - arguments.push_back(tensor->dim1_dim2()); - break; - - case TensorComponentType::Dim1xDim2xDim3: - arguments.push_back(tensor->dim1_dim2_dim3()); - break; - - default: - CKW_ASSERT(false); - } - } - } - - return arguments; -} - -TileOperand &Kernel::register_operand(std::unique_ptr<TileOperand> operand) -{ - const auto &name = operand->name(); - auto ptr = operand.get(); - - CKW_ASSERT(_operands.find(name) == _operands.end()); - _operands[name] = std::move(operand); - - return *ptr; -} - -TensorOperand &Kernel::register_operand(std::unique_ptr<TensorOperand> operand) -{ - const auto id = operand->info().id(); - const auto &name = operand->name(); - auto ptr = operand.get(); - - CKW_ASSERT(_tensor_id_operands.find(id) == _tensor_id_operands.end()); - CKW_ASSERT(_operands.find(name) == _operands.end()); - - _tensor_id_operands[id] = operand.get(); - _operands[name] = std::move(operand); - - return *ptr; -} - -prototype::GpuKernelWriterDataHolder *Kernel::impl() -{ - return _kernel.get(); -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/KernelArgument.cpp b/compute_kernel_writer/prototype/src/KernelArgument.cpp deleted file mode 100644 index 24ace28eb3..0000000000 --- a/compute_kernel_writer/prototype/src/KernelArgument.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/KernelArgument.h" - -#include "ckw/Error.h" -#include "ckw/TensorOperand.h" - -namespace ckw -{ - -KernelArgument::KernelArgument(TensorOperand &tensor) : _type(Type::TensorStorage), _id(tensor.info().id()) -{ - _sub_id.tensor_storage_type = tensor.storage_type(); -} - -KernelArgument::KernelArgument(TensorComponentOperand &tensor_component) - : _type(Type::TensorComponent), _id(tensor_component.tensor().info().id()) -{ - _sub_id.tensor_component_type = tensor_component.component_type(); -} - -KernelArgument::Type KernelArgument::type() const -{ - return _type; -} - -int32_t KernelArgument::id() const -{ - return _id; -} - -TensorStorageType KernelArgument::tensor_storage_type() const -{ - CKW_ASSERT(_type == Type::TensorStorage); - return _sub_id.tensor_storage_type; -} - -TensorComponentType KernelArgument::tensor_component_type() const -{ - CKW_ASSERT(_type == Type::TensorComponent); - return _sub_id.tensor_component_type; -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp deleted file mode 100644 index 9f58d9fefa..0000000000 --- a/compute_kernel_writer/prototype/src/KernelWriter.cpp +++ /dev/null @@ -1,371 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/KernelWriter.h" - -#include "ckw/Error.h" -#include "ckw/TensorInfo.h" -#include "ckw/TensorOperand.h" - -#include "src/Prototype.h" - -#include <sstream> - -namespace ckw -{ - -namespace -{ - -inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info) -{ - return prototype::TensorInfo{info.shape(), info.data_type(), info.data_layout(), info.id()}; -} - -} // namespace - -// ================================================================================================= -// Constructors and destructor -// ================================================================================================= - -KernelWriter::KernelWriter(Kernel &kernel) - : _kernel(&kernel), - _impl_attr(std::make_unique<prototype::GpuKernelWriterAttribute>()), - _impl(prototype::GpuKernelWriterFactory::create(_impl_attr.get(), kernel.impl())) -{ - _impl->set_IdSpace(1); -} - -KernelWriter::~KernelWriter() -{ -} - -// ================================================================================================= -// Scope management -// ================================================================================================= - -int32_t KernelWriter::id_space() const -{ - return _id_space; -} - -KernelWriter &KernelWriter::id_space(int32_t id_space) -{ - CKW_ASSERT(id_space <= _max_id_space); - - _id_space = id_space; - return *this; -} - -int32_t KernelWriter::next_id_space() -{ - id_space(++_max_id_space); - return _id_space; -} - -// ================================================================================================= -// Tensor and tile declaration -// ================================================================================================= - -TensorOperand & -KernelWriter::declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type) -{ - const auto var_name = generate_variable_name(name); - - _impl->declare_argument(var_name, create_impl_tensor_info(info)); - - auto &operand = _kernel->register_operand(std::make_unique<TensorOperand>(var_name, info, storage_type)); - - return operand; -} - -TileOperand &KernelWriter::declare_tile_argument(const std::string &name, int32_t value) -{ - const auto var_name = generate_variable_name(name); - - auto &operand = _kernel->register_operand(std::make_unique<TileOperand>(var_name, value)); - - return operand; -} - -std::string KernelWriter::generate_variable_name(const std::string &name) const -{ - std::stringstream var_name; - - var_name << "_" << _id_space << "_" << name; - - return var_name.str(); -} - -TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> operand_ptr) -{ - auto &operand = _kernel->register_operand(std::move(operand_ptr)); - const auto &name = operand.name(); - - if (!operand.is_constant()) - { - const auto &info = operand.tile_info(); - - _impl->declare_tile(name, prototype::TileInfo(info.data_type(), info.width(), info.height())); - } - else - { - _impl->declare_const_tile(name, operand.value(), operand.data_type()); - } - - return operand; -} - -// ================================================================================================= -// Load and store -// ================================================================================================= - -void KernelWriter::op_load(TileOperand &tile, - const TensorOperand &tensor, - const TensorTileSampler &sampler, - const TileOperand &dilation_y) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); - - auto impl_x = sampler.x().create_impl_operand(_impl.get()); - auto impl_y = sampler.y().create_impl_operand(_impl.get()); - auto impl_z = sampler.z().create_impl_operand(_impl.get()); - auto impl_b = sampler.b().create_impl_operand(_impl.get()); - - auto impl_dilation_y = dilation_y.create_impl_operand(_impl.get()); - - auto impl_dst = tile.create_impl_operand(_impl.get()); - - _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b, impl_dilation_y); -} - -void KernelWriter::op_load_indirect(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); - - auto impl_x = sampler.x().create_impl_operand(_impl.get()); - auto impl_y = sampler.y().create_impl_operand(_impl.get()); - auto impl_z = sampler.z().create_impl_operand(_impl.get()); - auto impl_b = sampler.b().create_impl_operand(_impl.get()); - - auto impl_dst = tile.create_impl_operand(_impl.get()); - - _impl->op_load_indirect(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b); -} - -void KernelWriter::util_get_indirect_buffer(TileOperand &tile, - const TensorOperand &tensor, - const TensorTileSampler &sampler, - const TileOperand &x, - const TileOperand &y, - const TileOperand &x_off, - const TileOperand &y_off) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); - - auto impl_x = x.create_impl_operand(_impl.get()); - auto impl_y = y.create_impl_operand(_impl.get()); - auto impl_x_off = x_off.create_impl_operand(_impl.get()); - auto impl_y_off = y_off.create_impl_operand(_impl.get()); - - auto impl_dst = tile.create_impl_operand(_impl.get()); - - _impl->util_get_indirect_buffer(impl_dst, impl_tensor, impl_x, impl_y, impl_x_off, impl_y_off); -} - -void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); - auto impl_src = tile.create_impl_operand(_impl.get()); - auto impl_x = sampler.x().create_impl_operand(_impl.get()); - auto impl_y = sampler.y().create_impl_operand(_impl.get()); - auto impl_z = sampler.z().create_impl_operand(_impl.get()); - auto impl_b = sampler.b().create_impl_operand(_impl.get()); - - _impl->op_store_immediate(impl_tensor, impl_src, impl_x, impl_y, impl_z, impl_b); -} - -// ================================================================================================= -// Data processing -// ================================================================================================= - -void KernelWriter::op_assign(const TileOperand &dst, const TileOperand &src) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_assign(impl_dst, impl_src); -} - -void KernelWriter::op_cast_expression(const TileOperand &dst, const TileOperand &src, const ConvertPolicy policy) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_cast_expression(impl_dst, impl_src, policy); -} - -void KernelWriter::op_binary_expression(const TileOperand &dst, - const TileOperand &lhs, - BinaryOp op, - const TileOperand &rhs) -{ - auto impl_lhs = lhs.create_impl_operand(_impl.get()); - auto impl_rhs = rhs.create_impl_operand(_impl.get()); - auto impl_dst = dst.create_impl_operand(_impl.get()); - - _impl->op_binary_expression(impl_dst, impl_lhs, op, impl_rhs); -} - -void KernelWriter::op_unary_expression(const TileOperand &dst, UnaryOp op, const TileOperand &src) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_unary_expression(impl_dst, op, impl_src); -} - -void KernelWriter::op_unary_elementwise_function(const TileOperand &dst, UnaryFunction opcode, const TileOperand &src) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_unary_elementwise_function(impl_dst, opcode, impl_src); -} - -void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, - BinaryFunction opcode, - const TileOperand &first, - const TileOperand &second) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_first = first.create_impl_operand(_impl.get()); - auto impl_second = second.create_impl_operand(_impl.get()); - - _impl->op_binary_elementwise_function(impl_dst, opcode, impl_first, impl_second); -} - -void KernelWriter::op_ternary_elementwise_function(const TileOperand &dst, - TernaryFunction opcode, - const TileOperand &first, - const TileOperand &second, - const TileOperand &third) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_first = first.create_impl_operand(_impl.get()); - auto impl_second = second.create_impl_operand(_impl.get()); - auto impl_third = third.create_impl_operand(_impl.get()); - - _impl->op_ternary_elementwise_function(impl_dst, opcode, impl_first, impl_second, impl_third); -} - -void KernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) -{ - auto impl_lhs = lhs.create_impl_operand(_impl.get()); - auto impl_rhs = rhs.create_impl_operand(_impl.get()); - - _impl->op_if_header(impl_lhs, op, impl_rhs); - _impl->compound_statement_begin(); - body(); - _impl->compound_statement_end(); -} - -void KernelWriter::op_else_if(const TileOperand &lhs, - BinaryOp op, - const TileOperand &rhs, - const std::function<void()> &body) -{ - auto impl_lhs = lhs.create_impl_operand(_impl.get()); - auto impl_rhs = rhs.create_impl_operand(_impl.get()); - - _impl->op_else_if_header(impl_lhs, op, impl_rhs); - _impl->compound_statement_begin(); - body(); - _impl->compound_statement_end(); -} - -void KernelWriter::op_else(const std::function<void()> &body) -{ - _impl->op_else_header(); - _impl->compound_statement_begin(); - body(); - _impl->compound_statement_end(); -} - -void KernelWriter::op_for_loop(const TileOperand &var_name, - BinaryOp cond_op, - const TileOperand &cond_value_name, - const TileOperand &update_var_name, - AssignmentOp update_op, - const TileOperand &update_value_name, - const std::function<void()> &body) -{ - auto impl_var_name = var_name.create_impl_operand(_impl.get()); - auto impl_cond_value_name = cond_value_name.create_impl_operand(_impl.get()); - auto impl_update_var_name = update_var_name.create_impl_operand(_impl.get()); - auto impl_update_value_name = update_value_name.create_impl_operand(_impl.get()); - - _impl->op_for_loop_header(impl_var_name, cond_op, impl_cond_value_name, impl_update_var_name, update_op, - impl_update_value_name); - _impl->compound_statement_begin(); - body(); - _impl->compound_statement_end(); -} - -// ================================================================================================= -// Misc -// ================================================================================================= - -void KernelWriter::op_get_global_id(const TileOperand &dst, int32_t dim) -{ - _impl->op_get_global_id(prototype::Operand(dst.name()), dim); -} - -void KernelWriter::op_return() -{ - _impl->op_return(); -} - -// ================================================================================================= -// Code generation -// ================================================================================================= - -std::string KernelWriter::generate_code() -{ - return prototype::generate_code(*_kernel->impl(), _kernel->name()); -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/OperandBase.cpp b/compute_kernel_writer/prototype/src/OperandBase.cpp deleted file mode 100644 index e0617fdc06..0000000000 --- a/compute_kernel_writer/prototype/src/OperandBase.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/OperandBase.h" - -namespace ckw -{ - -OperandBase::OperandBase(const std::string &name) : _name(name) -{ -} - -OperandBase::~OperandBase() -{ -} - -const std::string &OperandBase::name() const -{ - return _name; -} - -OperandBase &OperandBase::name(const std::string &name) -{ - _name = name; - return *this; -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h deleted file mode 100644 index b392fe2651..0000000000 --- a/compute_kernel_writer/prototype/src/Prototype.h +++ /dev/null @@ -1,4189 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H -#define CKW_PROTOTYPE_SRC_PROTOTYPE_H - -#include "ckw/Error.h" -#include "ckw/TensorInfo.h" -#include "ckw/types/ConvertPolicy.h" -#include "ckw/types/DataType.h" -#include "ckw/types/Functions.h" -#include "ckw/types/GpuTargetLanguage.h" -#include "ckw/types/Operators.h" -#include "ckw/types/TensorSamplerTypes.h" - -#include <algorithm> -#include <array> -#include <cassert> // assert (to be removed) -#include <chrono> -#include <cmath> -#include <cstdint> // int32_t -#include <functional> -#include <iostream> // cout (to be removed) -#include <map> -#include <memory> -#include <stdexcept> -#include <string> -#include <unordered_map> -#include <vector> - -namespace ckw -{ -namespace prototype -{ - -// Dummy data structure for Size2D -using Size2D = std::vector<int32_t>; - -// Dummy Status -using Status = void; - -enum class ComponentType : int32_t -{ - Complex = 0, - Simple = 1, - Unfusable = 2 -}; - -enum class GpuCompilationSpeed -{ - Fast = 0x00, // fast compilation may increase the latency of the network - Slow = 0x01 // slow compilation may decrease the latency of the network -}; - -enum class GpuExtensions -{ - Fp16, - Dot8, - Mmul, - FastMath -}; - -struct TensorInfo -{ - TensorShape shape{{0}}; - DataType data_type{DataType::Unknown}; - TensorDataLayout data_layout{TensorDataLayout::Nhwc}; - int32_t id{-1}; -}; - -struct ComponentAttribute -{ - GpuCompilationSpeed compilation_speed{GpuCompilationSpeed::Fast}; - bool overwrite_tile{true}; -}; - -inline std::string data_type_to_cl_type(DataType dt) -{ - switch (dt) - { - case DataType::Fp32: - return "float"; - case DataType::Fp16: - return "half"; - case DataType::Int8: - return "char"; - case DataType::Uint8: - return "uchar"; - case DataType::Uint16: - return "ushort"; - case DataType::Int16: - return "short"; - case DataType::Uint32: - return "uint"; - case DataType::Int32: - return "int"; - case DataType::Bool: - return "bool"; - default: - assert(false); - return ""; - } -} - -inline int32_t width_to_cl_vector_size(int32_t width) -{ - switch (width) - { - case 1: - return 1; - case 2: - return 2; - case 3: - return 3; - case 4: - return 4; - case 5: - case 6: - case 7: - case 8: - return 8; - case 9: - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - case 16: - return 16; - default: - assert(false); - return 0; - } -} - -inline std::string get_cl_data_type(DataType dt, int32_t width) -{ - std::string data_type; - int32_t w = width_to_cl_vector_size(width); - data_type += data_type_to_cl_type(dt); - if (w != 1) - { - data_type += std::to_string(w); - } - return data_type; -} - -inline std::string to_opencl_store(int32_t vector_length) -{ - if (vector_length != 1) - { - return "vstore" + std::to_string(vector_length) + "("; - } - else - { - return "*("; - } -} - -struct TileInfo -{ - TileInfo() - { - } - - TileInfo(DataType dt) : dt(dt), w(1), h(1) - { - } - - TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1) - { - } - - TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height) - { - } - - DataType dt{DataType::Unknown}; // Data type of the tile - int32_t w{0}; // Width (i.e. c0 - portion of the channels) - int32_t h{0}; // Height (i.e. s0 - portion of the spatial dimensions) -}; - -inline std::ostream &operator<<(std::ostream &o, const TileInfo &a) -{ - o << a.w << " x " << a.h; - return o; -} - -struct DataTypeAsString -{ - std::string str{""}; - DataType dt{DataType::Unknown}; - int32_t size{1}; -}; - -struct ValueAsString -{ - std::string str{""}; - DataTypeAsString type{}; -}; - -// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c -// A Tile is a collection of variables used to express a 2D data. -class IScalarTile -{ -public: - virtual ~IScalarTile() = default; - - /** Method to get the scalar variable from a tile - * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge - * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge - * - * @return the scalar variable as a string - */ - virtual ValueAsString scalar(int32_t x, int32_t y) const = 0; - - /** Method to get the list of underlying variable names used by the tile - * - * @return the list of variable names - */ - virtual std::vector<ValueAsString> underlying_source_variables() const = 0; - - /** Method to get the name of the tile. - * - * @return the name of the tile - */ - std::string name() const - { - return _basename; - } - - /** Method to get the tile format - * - * @return the format - */ - TileInfo format() const - { - return _format; - } - - /** Method to know whether the tile is assignable or not (constant) - * - * @return true if the tile is assignable - */ - virtual bool is_assignable() const = 0; - - /** Method to know whether the tile needs to be declared - * - * @return true if the tile needs to be declared in the code before being used - */ - virtual bool need_declaration() const = 0; - -protected: - TileInfo _format{}; // Tile format - std::string _basename{""}; // Tile name -}; - -// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context. -// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors -class IVectorTile : public IScalarTile -{ -public: - virtual ~IVectorTile() = default; - - /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. - * The user can query the list of supported width for the vectors through preferred_vector_sizes(). - * - * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge - * - * @return the vector variable as a string - */ - virtual ValueAsString vector(int32_t y) const = 0; - - /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. - * - * @return the vector variable as a string - */ - virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0; - /** Method to get the preferred vector sizes. - * - * @return a vector with the preferred vector sizes - */ - //virtual std::vector<int32_t> preferred_vector_sizes() const = 0; -}; - -class ClTile : public IVectorTile -{ -public: - ClTile(const std::string &name, TileInfo format) - { - _format = format; - _basename = name; - } - - ValueAsString scalar(int32_t x, int32_t y) const override - { - x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0)); - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, 1); - t.type.dt = _format.dt; - t.type.size = 1; - - // Check required because if the width has only one element, we cannot use .s0 - if (_format.w != 1) - { - // Automatic broadcasting - t.str += ".s" + std::to_string(x); - } - - return t; - } - - ValueAsString vector(int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, _format.w); - t.type.dt = _format.dt; - t.type.size = _format.w; - return t; - } - - ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, width); - t.type.dt = _format.dt; - t.type.size = width; - - if (_format.w != 1) - { - t.str += ".s"; - for (int i = 0; i < width; ++i) - { - t.str += to_scalar_hex(x_start + i); - } - } - return t; - } - - std::vector<ValueAsString> underlying_source_variables() const override - { - std::vector<ValueAsString> vars; - for (int32_t y = 0; y < _format.h; ++y) - { - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, _format.w); - t.type.dt = _format.dt; - t.type.size = _format.w; - vars.push_back(t); - } - return vars; - } - - bool is_assignable() const override - { - return true; - } - - bool need_declaration() const override - { - return true; - } - -private: - std::string build_variable_name(int32_t y) const - { - std::string var_name = _basename; - - if (_format.h == 1) - { - return var_name; - } - else - { - var_name += "_"; - var_name += std::to_string(y); - } - - return var_name; - } - - std::string to_scalar_hex(int32_t x) const - { - switch (x) - { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: - case 8: - case 9: - return std::to_string(x); - case 10: - return "A"; - case 11: - return "B"; - case 12: - return "C"; - case 13: - return "D"; - case 14: - return "E"; - case 15: - return "F"; - default: - std::cout << "Unsupported hexadecimal value" << std::endl; - assert(false); - return ""; - } - } -}; - -// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time. -class ClConstantTile : public IVectorTile -{ -public: - ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt) - { - _format.w = in[0].size(); - _format.h = in.size(); - _format.dt = dt; - - _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w)); - - for (int32_t y = 0; y < _format.h; ++y) - { - for (int32_t x = 0; x < _format.w; ++x) - { - _data[y][x] = in[y][x]; - } - } - } - - ValueAsString scalar(int32_t x, int32_t y) const override - { - x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0)); - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = _data[y][x]; - t.type.str = get_cl_data_type(_format.dt, 1); - t.type.dt = _format.dt; - t.type.size = 1; - - return t; - } - - ValueAsString vector(int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - return vector(0, _format.w, y); - } - - ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = ""; - t.type.str = get_cl_data_type(_format.dt, width); - t.type.dt = _format.dt; - t.type.size = width; - - if (width > 1) - { - t.str += "((" + get_cl_data_type(_format.dt, width) + ")("; - } - - int32_t x = x_start; - for (; x < width - 1; ++x) - { - t.str += scalar(x, y).str; - t.str += ", "; - } - t.str += scalar(x, y).str; - - if (width > 1) - { - t.str += "))"; - } - - return t; - } - - std::vector<ValueAsString> underlying_source_variables() const override - { - std::vector<ValueAsString> vars; - - for (int32_t y = 0; y < _format.h; ++y) - { - for (int32_t x = 0; x < _format.w; ++x) - { - ValueAsString t; - t.str = _data[y][x]; - t.type.str = get_cl_data_type(_format.dt, 1); - t.type.dt = _format.dt; - t.type.size = 1; - vars.push_back(t); - } - } - - return vars; - } - - bool is_assignable() const override - { - return false; - } - - bool need_declaration() const override - { - return false; - } - -private: - std::vector<std::vector<std::string>> _data{}; -}; - -enum class TensorComponentIndex : int32_t -{ - IndexMask = 0x0000000f, -}; - -enum class TensorComponentGroup : int32_t -{ - OffsetFirstElement = 0x00000100, - Stride = 0x00001000, - Dimension = 0x00010000, - FoldedDimension = 0x00100000, - Constant = 0x01000000 -}; - -inline std::string to_string(TensorComponentType x) -{ - switch (x) - { - case TensorComponentType::Unknown: - return "Unknown"; - case TensorComponentType::OffsetFirstElement: - return "OffsetFirstElement"; - case TensorComponentType::Stride1: - return "Stride1"; - case TensorComponentType::Stride2: - return "Stride2"; - case TensorComponentType::Stride3: - return "Stride3"; - case TensorComponentType::Stride4: - return "Stride4"; - case TensorComponentType::Dim0: - return "Dim0"; - case TensorComponentType::Dim1: - return "Dim1"; - case TensorComponentType::Dim2: - return "Dim2"; - case TensorComponentType::Dim3: - return "Dim3"; - case TensorComponentType::Dim4: - return "Dim4"; - case TensorComponentType::Dim1xDim2: - return "Dim1xDim2"; - case TensorComponentType::Dim1xDim2xDim3: - return "Dim1xDim2xDim3"; - default: - assert(false); - return ""; - } -} - -class ITensorArgument -{ -public: - virtual ~ITensorArgument() = default; - - /** Method to get the tensor component as a string - * - * @param[in] x tensor component to query - * - * @return the tensor component as a string - */ - virtual std::string component(TensorComponentType x) = 0; - - /** Method to get the tensor component type declaration as a string - * - * @return the tensor component type declaration as a string - */ - virtual std::string component_type_declaration() const = 0; - - /** Method to get the tensor component data type - * - * @return the tensor component data type - */ - virtual DataType component_data_type() const = 0; - - /** Method to get the tensor component declarations - * - * @return a vector containing the tensor component declarations - */ - virtual std::vector<TensorComponentType> component_declarations() const = 0; - - /** Method to get the name of the tensor argument. - * - * @return the name of the tensor argument - */ - std::string name() const - { - return _basename; - } - - /** Method to get the tensor format - * - * @return the format - */ - TensorInfo format() const - { - return _format; - } - -protected: - TensorInfo _format{}; - std::string _basename{}; -}; - -enum class GpuTensorStorage : int32_t -{ - Unknown = 0x0000, - BufferUint8Ptr = 0x0012, - Image2dReadOnly = 0x0020, - Image2dWriteOnly = 0x0021, - Image3dReadOnly = 0x0030, - Image3dWriteOnly = 0x0031 -}; - -inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s) -{ - switch (s) - { - case TensorStorageType::Unknown: - return GpuTensorStorage::Unknown; - - case TensorStorageType::BufferUint8Ptr: - return GpuTensorStorage::BufferUint8Ptr; - - case TensorStorageType::Texture2dReadOnly: - return GpuTensorStorage::Image2dReadOnly; - - case TensorStorageType::Texture2dWriteOnly: - return GpuTensorStorage::Image2dWriteOnly; - - default: - assert(false); - return GpuTensorStorage::Unknown; - } -} - -inline TensorStorageType to_tensor_storage(GpuTensorStorage s) -{ - switch (s) - { - case GpuTensorStorage::Unknown: - return TensorStorageType::Unknown; - - case GpuTensorStorage::BufferUint8Ptr: - return TensorStorageType::BufferUint8Ptr; - - case GpuTensorStorage::Image2dReadOnly: - return TensorStorageType::Texture2dReadOnly; - - case GpuTensorStorage::Image2dWriteOnly: - return TensorStorageType::Texture2dWriteOnly; - - default: - assert(false); - return TensorStorageType::Unknown; - } -} - -class IGpuTensorArgument : public ITensorArgument -{ -public: - virtual ~IGpuTensorArgument() = default; - - /** Method to get the tensor storage, which is the underlying storage used to keep the data memory - * - * @param[in] x tensor storage to query - * - * @return the tensor storage as a string - */ - virtual std::string storage(GpuTensorStorage x) = 0; - - /** Method to get the tensor storage type declaration as a string - * - * @param[in] x tensor component to query - * - * @return the tensor storage type declaration as a string - */ - virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0; - - /** Method to get the tensor storage declarations - * - * @return a vector containing the tensor storage declarations - */ - virtual std::vector<GpuTensorStorage> storage_declarations() const = 0; -}; - -class ClTensorArgument : public IGpuTensorArgument -{ -public: - ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible) - { - _basename = name; - _format = x; - _return_by_value_when_possible = return_by_value_when_possible; - } - - // Methods to override - std::string component(TensorComponentType x) override - { - if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Constant))) - { - int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask); - return std::to_string(idx - 1); - } - - if (_return_by_value_when_possible) - { - if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Dimension))) - { - int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask); - return std::to_string(_format.shape[idx]); - } - - if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::FoldedDimension))) - { - switch (x) - { - case TensorComponentType::Dim1xDim2: - return std::to_string(_format.shape[1] * _format.shape[2]); - case TensorComponentType::Dim1xDim2xDim3: - return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]); - default: - std::cout << "Unsupported folded dimension" << std::endl; - assert(false); - } - } - } - - if (std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end()) - { - _components_required.push_back(x); - } - - return build_component_name(x); - } - - std::string component_type_declaration() const override - { - return "int"; - }; - - DataType component_data_type() const override - { - return DataType::Int32; - } - - std::string storage(GpuTensorStorage x) override - { - if (std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end()) - { - _storage_required.push_back(x); - } - - return build_storage_name(x); - } - - std::string storage_type_declaration(GpuTensorStorage x) const override - { - switch (x) - { - case GpuTensorStorage::BufferUint8Ptr: - return "__global uchar*"; - case GpuTensorStorage::Image2dReadOnly: - return "__read_only image2d_t"; - case GpuTensorStorage::Image2dWriteOnly: - return "__write_only image2d_t"; - case GpuTensorStorage::Image3dReadOnly: - return "__read_only image3d_t "; - case GpuTensorStorage::Image3dWriteOnly: - return "__write_only image3d_t "; - default: - std::cout << "Unsupported storage" << std::endl; - assert(false); - return ""; - } - }; - - std::vector<GpuTensorStorage> storage_declarations() const override - { - return _storage_required; - } - - std::vector<TensorComponentType> component_declarations() const override - { - return _components_required; - } - -private: - std::string build_storage_name(GpuTensorStorage x) const - { - std::string var_name = _basename; - - switch (x) - { - case GpuTensorStorage::BufferUint8Ptr: - return var_name + "_ptr"; - case GpuTensorStorage::Image2dReadOnly: - case GpuTensorStorage::Image2dWriteOnly: - return var_name + "_img2d"; - case GpuTensorStorage::Image3dReadOnly: - case GpuTensorStorage::Image3dWriteOnly: - return var_name + "_img3d"; - default: - std::cout << "Unsupported storage" << std::endl; - assert(false); - } - - return var_name; - } - - std::string build_component_name(TensorComponentType x) const - { - std::string var_name = _basename; - - switch (x) - { - case TensorComponentType::OffsetFirstElement: - return var_name + "_offset_first_element"; - case TensorComponentType::Stride1: - return var_name + "_stride1"; - case TensorComponentType::Stride2: - return var_name + "_stride2"; - case TensorComponentType::Stride3: - return var_name + "_stride3"; - case TensorComponentType::Dim0: - return var_name + "_dim0"; - case TensorComponentType::Dim1: - return var_name + "_dim1"; - case TensorComponentType::Dim2: - return var_name + "_dim2"; - case TensorComponentType::Dim3: - return var_name + "_dim3"; - case TensorComponentType::Dim1xDim2: - return var_name + "_dim1xdim2"; - case TensorComponentType::Dim1xDim2xDim3: - return var_name + "_dim1xdim2xdim3"; - default: - std::cout << "Unsupported component" << std::endl; - assert(false); - } - - return var_name; - } - - bool _return_by_value_when_possible{false}; - std::vector<GpuTensorStorage> _storage_required{}; - std::vector<TensorComponentType> _components_required{}; -}; - -/** - * @brief Data structure that contains the declared tiles by the components. - * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to - * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack - * and remove (pop) all the tiles from the level above. - * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code. - * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions - * when declaring tiles among different components. - * - */ -class GpuTileRegistry -{ -public: - enum class RegistryTileType - { - Tile, - Link - }; - - using RegistryIdSpace = int32_t; - using RegistryLevel = int32_t; - using RegistryTileName = std::string; - - struct RegistryTileTableEntry - { - RegistryLevel registry_level{0}; - std::unique_ptr<IVectorTile> tile_object{nullptr}; - }; - - struct RegistryTileTypeTableEntry - { - RegistryTileType tile_type{RegistryTileType::Tile}; - RegistryTileName tile_name{}; - RegistryIdSpace registry_idspace{0}; - RegistryLevel registry_level{0}; - }; - - using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>; - using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>; - - /** - * @brief Construct a new Gpu Tile Registry object - * - */ - GpuTileRegistry() - { - _language = GpuTargetLanguage::Unknown; - } - - /** - * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language - * - * @param[in] language Gpu programming language to use - */ - GpuTileRegistry(GpuTargetLanguage language) - { - _language = language; - } - - /** - * @brief Default destructor. Destroy the Gpu Tile Registry object - * - */ - ~GpuTileRegistry() = default; - - /** - * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles. - * Therefore, the IdSpace should be set before declaring any tiles. - * - * @param[in] id The IdSpace id - */ - void set_IdSpace(int32_t id) - { - _IdSpace = id; - } - - /** - * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles - * - * @return The IdSpace id - */ - int32_t IdSpace() const - { - return _IdSpace; - } - - /** - * @brief Gets all the IdSpace declarations defined in the tile registry. - * - * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations. - */ - std::vector<int32_t> IdSpace_declarations() const - { - std::vector<int32_t> x; - - auto it = _frags.begin(); - - while (it != _frags.end()) - { - x.push_back(it->first); - - it++; - } - - return x; - } - - /** - * @brief Declare a tile from a previously created tile - */ - void insert(const std::string &name, const IVectorTile *frag) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - const std::string var_name = frag->name(); - TileInfo format = frag->format(); - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if (result == nullptr) - { - std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format); - - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - } - - /** - * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace() - * - * @note The reference name used for declaring the tile should not be previously used in the IdSpace - * - * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry. - * @param[in] format Tile format use to use - */ - void insert(const std::string &name, const TileInfo &format) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - const std::string var_name = generate_tile_name(name); - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if (result == nullptr) - { - std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format); - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - } - - /** - * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string - * - * @note The reference name used for declaring the tile should not be previously used in the IdSpace - * - * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry. - * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile - * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure - * that the data type is aligned with the content of the std::string. - */ - void insert(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if (result == nullptr) - { - std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt); - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - } - - /** - * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string - * - * @note This method can be used to declare temporary tiles that need to be accessed only once. - * - * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile - * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure - * that the data type is aligned with what passed with the std::string. - * - * @return IVectorTile* the anonymous constant tile - */ - IVectorTile *insert(const std::vector<std::vector<std::string>> &in, DataType dt) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++); - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if (result == nullptr) - { - std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt); - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - - return (*this)[key_var_name]; - } - - /** - * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user - * - * @param[in] name The name of the tile to retrieve - * @param[in] IdSpace The IdSpace id where to search the tile - * - * @return IVectorTile* The tile - */ - IVectorTile *get(const std::string &name, int32_t IdSpace) - { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; - - IVectorTile *result = nullptr; - auto search_IdSpace = _frags.find(key_IdSpace); - if (search_IdSpace != _frags.end()) - { - auto search_tile = _frags[key_IdSpace].find(key_var_name); - if (search_tile != _frags[key_IdSpace].end()) - { - result = search_tile->second.tile_object.get(); - assert(result != nullptr); - } - } - - return result; - } - - /** - * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace() - * - * @param[in] name The name of the tile to retrieve - * - * @return IVectorTile* The tile - */ - IVectorTile *operator[](const std::string &name) - { - return get(name, _IdSpace); - } - - /** - * @brief Check whether the tile in the in the IdSpace provided by the user exists - * - * @param[in] name Name of the tile to search for - * @param[in] IdSpace The IdSpace id where to search the tile - * - * @return true if the tile exists - * @return false if the tile does not exist - */ - bool has_tile(const std::string &name, int32_t IdSpace) const - { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; - - // IVectorTile* result = nullptr; - auto search_IdSpace = _frags.find(key_IdSpace); - - return search_IdSpace != _frags.end(); - } - - /** - * @brief Check whether the tile within the current IdSpace exists - * - * @param[in] name Name of the tile to search for - * - * @return true if the tile exists - * @return false if the tile does not exist - */ - bool has_tile(const std::string &name) const - { - return has_tile(name, _IdSpace); - } - - /** - * @brief Get all the tiles declared within the IdSpace provided by the user - * - * @param[in] IdSpace IdSpace where to retrieve all the declared tiles - * - * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user - */ - std::vector<IVectorTile *> tile_declarations(int32_t IdSpace) - { - std::vector<IVectorTile *> tiles; - - std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin(); - - while (it != _frag_types[IdSpace].end()) - { - // The following line should be enabled. However, we cannot at this stage - // because it used to retrieve the output tile produced by each component. - // However, this method should NOT be used to retrieve the output tile - //if(it->second.tile_type == RegistryTileType::Tile) - { - tiles.push_back(get(it->second.tile_name, it->second.registry_idspace)); - } - it++; - } - - return tiles; - } - - /** - * @brief Increase the level of stack. - * - */ - void increment_registry_level() - { - _registry_level++; - } - - /** - * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack. - * - */ - void decrement_registry_level() - { - assert(_registry_level >= 0); - - // Remove all variables in the local scope - std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin(); - - while (it != _frags[_IdSpace].end()) - { - if (it->second.registry_level == _registry_level) - { - it = _frags[_IdSpace].erase(it); - } - else - { - it++; - } - } - - std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin(); - - while (it_type != _frag_types[_IdSpace].end()) - { - if (it_type->second.registry_level == _registry_level) - { - it_type = _frag_types[_IdSpace].erase(it_type); - } - else - { - it_type++; - } - } - - _registry_level--; - } - - /** - * @brief Get the level of the stack - * - */ - int32_t level() const - { - return _registry_level; - } - -private: - // This method ensures that the key is unique among different components - std::string generate_tile_name(const std::string &name) - { - assert(_IdSpace >= 0); - if (_registry_level == 0) - { - return "_G" + std::to_string(_IdSpace) + "_" + name; - } - else - { - return name; - } - } - - RegistryTileTable _frags{}; - RegistryTileTypeTable _frag_types{}; - RegistryLevel _registry_level{0}; - RegistryIdSpace _IdSpace{-1}; - int32_t _anonymous_frag_count{0}; // Counter used to create the anonymous tiles - GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language -}; - -using TensorEntry = std::unique_ptr<IGpuTensorArgument>; - -/** - * @brief Data structure that contains the tensors consumed by the components. - * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions - * when declaring tensors among different components. - * - */ -class GpuTensorArgumentRegistry -{ -public: - /** - * @brief Construct a new Gpu Tensor Registry object - * - */ - GpuTensorArgumentRegistry() - { - _language = GpuTargetLanguage::Unknown; - } - - /** - * @brief Construct a new Gpu Tensor Registry object - * - * @param[in] language Gpu programming language to use - */ - GpuTensorArgumentRegistry(GpuTargetLanguage language) - { - _language = language; - } - - /** - * @brief Default destructor. Destroy the Gpu Tensor Registry object - * - */ - ~GpuTensorArgumentRegistry() = default; - - /** - * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors. - * Therefore, the IdSpace should be set before declaring any tensors. - * - * @param[in] id The IdSpace id - */ - void set_IdSpace(int32_t id) - { - _IdSpace = id; - } - - /** - * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors - * - * @return The IdSpace id - */ - int32_t IdSpace() const - { - return _IdSpace; - } - - /** - * @brief Gets all the IdSpace declarations defined in the tensor registry. - * - * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations. - */ - std::vector<int32_t> IdSpace_declarations() const - { - std::vector<int32_t> x; - - auto it = _refs.begin(); - - while (it != _refs.end()) - { - x.push_back(it->first); - - it++; - } - - return x; - } - - /** - * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace() - * - * @note The reference name used for declaring the tensor should not be previously used in the IdSpace - * - * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry. - * @param[in] x Pair of tensor info and tensor id - * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components - */ - void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const int32_t tensor_id = x.id; - const std::string key_var_name = name; - const std::string var_name = generate_tensor_name(name, tensor_id); - - // First, check whether the tensor has already a reference. If so, trigger an assert - assert(!has_tensor_argument(name)); - - // Check whether a tensor with that tensorID exists - auto result = _tensor_arguments.find(tensor_id); - if (result == _tensor_arguments.end()) - { - // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference - std::unique_ptr<ClTensorArgument> arg = - std::make_unique<ClTensorArgument>(var_name, x, return_by_value_when_possible); - _tensor_arguments[tensor_id] = std::move(arg); - } - - _refs[key_IdSpace][key_var_name] = tensor_id; - } - - /** - * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace() - * - * @param[in] name The name of the tensor to retrieve - * - * @return IGpuTensor* The tensor - */ - IGpuTensorArgument *operator[](const std::string &name) - { - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - - IGpuTensorArgument *result = nullptr; - auto search_IdSpace = _refs.find(key_IdSpace); - if (search_IdSpace != _refs.end()) - { - auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - - if (search_tensor_id != _refs[key_IdSpace].end()) - { - const int32_t tensor_id = search_tensor_id->second; - auto search_tensor_argument = _tensor_arguments.find(tensor_id); - if (search_tensor_argument != _tensor_arguments.end()) - { - result = search_tensor_argument->second.get(); - } - assert(result != nullptr); - } - } - - return result; - } - - /** - * @brief Get all the tensors declared in the IdSpace provided by the user - * - * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors - */ - std::vector<IGpuTensorArgument *> tensor_argument_declarations() - { - std::vector<IGpuTensorArgument *> args; - - auto it = _tensor_arguments.begin(); - - while (it != _tensor_arguments.end()) - { - args.push_back(it->second.get()); - it++; - } - - return args; - } - - /** - * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists - * - * @param[in] name Name of the tensor argument to search for - * - * @return true if the tensor argument exists - * @return false if the tensor argument does not exist - */ - bool has_tensor_argument(const std::string &name) - { - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - - auto search_IdSpace = _refs.find(key_IdSpace); - - if (search_IdSpace != _refs.end()) - { - auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - - return search_tensor_id != _refs[key_IdSpace].end(); - } - else - { - return false; - } - } - - /** - * @brief Check whether the tensor argument is in the the IdSpace provided by the user - * - * @param[in] name Name of the tensor argument to search for - * @param[in] IdSpace The IdSpace id where to search the tensor argument - * - * @return true if the tile exists - * @return false if the tile does not exist - */ - bool has_tensor_argument(const std::string &name, int32_t IdSpace) - { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; - - auto search_IdSpace = _refs.find(key_IdSpace); - - if (search_IdSpace != _refs.end()) - { - auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - - return search_tensor_id != _refs[key_IdSpace].end(); - } - else - { - return false; - } - } - -private: - // This method ensures that the key is unique among different components - std::string generate_tensor_name(const std::string &name, int32_t tensor_id) - { - assert(tensor_id >= 0); - - return name + std::to_string(tensor_id); - } - - std::map<int32_t, TensorEntry> _tensor_arguments{}; - std::map<int32_t, std::map<std::string, int32_t>> _refs{}; - int32_t _IdSpace{-1}; - GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language -}; - -enum class OpType : int32_t -{ - Elementwise = 0x0000, - Relational = 0x1000, - Algebra = 0x2000 -}; - -inline std::string to_string(AssignmentOp op) -{ - switch (op) - { - case AssignmentOp::Decrement: - return "-="; - case AssignmentOp::Increment: - return "+="; - default: - assert(false); - return ""; - } -} - -inline std::string to_string(UnaryOp op) -{ - switch (op) - { - case UnaryOp::LogicalNot: - return "!"; - case UnaryOp::BitwiseNot: - return "~"; - case UnaryOp::Negate: - return "-"; - default: - assert(false); - return ""; - } -} - -inline std::string to_string(BinaryOp op) -{ - switch (op) - { - case BinaryOp::Add: - return "+"; - case BinaryOp::Sub: - return "-"; - case BinaryOp::Mul: - return "*"; - case BinaryOp::Div: - return "/"; - case BinaryOp::Mod: - return "%"; - case BinaryOp::Equal: - return "=="; - case BinaryOp::Less: - return "<"; - case BinaryOp::LessEqual: - return "<="; - case BinaryOp::Greater: - return ">"; - case BinaryOp::GreaterEqual: - return ">="; - case BinaryOp::LogicalAnd: - return "&&"; - case BinaryOp::LogicalOr: - return "||"; - case BinaryOp::BitwiseXOR: - return "^"; - default: - assert(false); - return ""; - } -} - -inline std::string binary_op_string(BinaryOp op) -{ - switch (op) - { - case BinaryOp::Add: - return "add"; - case BinaryOp::Sub: - return "sub"; - case BinaryOp::Mul: - return "mul"; - case BinaryOp::Div: - return "div"; - case BinaryOp::Mod: - return "mod"; - case BinaryOp::Equal: - return "eq"; - case BinaryOp::Less: - return "gt"; - case BinaryOp::LessEqual: - return "gteq"; - case BinaryOp::Greater: - return "lt"; - case BinaryOp::GreaterEqual: - return "lte"; - default: - assert(false); - return ""; - } -} - -enum class OperandType : int32_t -{ - Unknown = 0x00000000, - ScalarFp32 = 0x00001011, // Immediate scalar tile - ScalarFp16 = 0x00001012, // Immediate scalar tile - ScalarInt32 = 0x00001021, // Immediate scalar tile - ScalarInt16 = 0x00001022, // Immediate scalar tile - ScalarInt8 = 0x00001024, // Immediate scalar tile - ScalarUInt32 = 0x00001031, // Immediate scalar tile - ScalarUInt16 = 0x00001032, // Immediate scalar tile - ScalarUInt8 = 0x00001034, // Immediate scalar tile - ScalarBool = 0x00001041, // Immediate scalar tile - ScalarTile = 0x00001050, // Scalar from a tile - Tile = 0x00010000, // Tile - TensorStride1 = 0x00100001, // Tensor component - TensorStride2 = 0x00100002, // Tensor component - TensorStride3 = 0x00100003, // Tensor component - TensorStride4 = 0x00100004, // Tensor component - TensorDim0 = 0x00100010, // Tensor component - TensorDim1 = 0x00100020, // Tensor component - TensorDim2 = 0x00100030, // Tensor component - TensorDim3 = 0x00100040, // Tensor component - TensorDim4 = 0x00100050, // Tensor component - TensorC = 0x00100010, // Tensor component - TensorW = 0x00100020, // Tensor component - TensorH = 0x00100030, // Tensor component - TensorD = 0x00100040, // Tensor component - TensorN = 0x00100050, // Tensor component - TensorDim1xDim2 = 0x00100100, // Tensor component - TensorDim1xDim2xDim3 = 0x00100200, // Tensor component - TensorWxH = 0x00100300, // Tensor component - TensorWxHxD = 0x00100400, // Tensor component - TensorDataOffset = 0x00100500, // Tensor component -}; - -struct ScalarTileCoord -{ - ScalarTileCoord() - { - } - - ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0) - { - } - - int32_t x{-1}; - int32_t y{-1}; -}; - -/** - * @brief Operand class. This object is used to pass the operands to the operations performed by the writer. - * Operand can be of three types: - * -# Scalar immediate: constant expression - * -# Tile: A tile - * -# Tensor component: A component (scalar) of a tensor - * - */ -class Operand -{ -public: - Operand(const std::string &val) - { - _str = val; - _type = OperandType::Tile; - } - - Operand(const std::string &val, const ScalarTileCoord &coord) - { - _str = val; - _type = OperandType::ScalarTile; - _coord = coord; - } - - Operand(const std::string &val, OperandType type) - { - _str = val; - _type = type; - } - - Operand(const Operand &t) - { - _str = t.value(); - _type = t.type(); - } - - Operand &operator=(const Operand &t) - { - _str = t.value(); - _type = t.type(); - _coord = t.scalar_tile_coordinate(); - return *this; - } - - std::string value() const - { - return _str; - } - - OperandType type() const - { - return _type; - } - - ScalarTileCoord scalar_tile_coordinate() const - { - return _coord; - } - -private: - std::string _str{}; - OperandType _type{OperandType::Unknown}; - ScalarTileCoord _coord{}; -}; - -using GpuSamplerTensorStorage = GpuTensorStorage; - -struct GpuSampler -{ - GpuSampler() = default; - - TensorSamplerFormat format{TensorSamplerFormat::Unknown}; - GpuSamplerTensorStorage storage{GpuSamplerTensorStorage::Unknown}; - TensorSamplerAddressModeX address_mode_x{TensorSamplerAddressModeX::Unknown}; - TensorSamplerAddressModeY address_mode_y{TensorSamplerAddressModeY::Unknown}; - TensorSamplerAddressModeZ address_mode_z{TensorSamplerAddressModeZ::Unknown}; -}; - -inline GpuSampler create_simple_sampler( - const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z) -{ - CKW_UNUSED(step_x, step_y, step_z); - - auto tensor = tensor_info_id->shape; - - GpuSampler dst_sampler; - dst_sampler.format = sampler.format; - dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr; - dst_sampler.address_mode_x = sampler.address_mode_x; - dst_sampler.address_mode_y = sampler.address_mode_y; - dst_sampler.address_mode_z = sampler.address_mode_z; - - int32_t dim_x = 0; - int32_t dim_y = 0; - int32_t dim_z = 0; - - switch (sampler.format) - { - case TensorSamplerFormat::C_W_H: - dim_x = tensor[0]; - dim_y = tensor[1]; - dim_z = tensor[2]; - break; - case TensorSamplerFormat::C_WH_1: - dim_x = tensor[0]; - dim_y = tensor[1] * tensor[2]; - dim_z = 1; - break; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; - } - - if (dim_x == 1) - { - assert(step_x == 1); - dst_sampler.address_mode_x = TensorSamplerAddressModeX::None; - } - - if (dim_y == 1) - { - assert(step_y == 1); - dst_sampler.address_mode_y = TensorSamplerAddressModeY::None; - } - - if (dim_z == 1) - { - assert(step_z == 1); - dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None; - } - - return dst_sampler; -} - -class GpuOutputSampler -{ -public: - GpuOutputSampler() = default; - - /** - * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once - * by the root component. Once initialized, all simpler components will need to used this sampler - * or a broadcasted version of it - * - * @param[in] sampler GpuSampler - * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile! - * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile! - * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile! - */ - void initialize(const TensorInfo *tensor_info_id, - GpuSamplerTensorStorage tensor_storage, - TensorSamplerFormat tensor_format, - int32_t step_x, - int32_t step_y, - int32_t step_z) - { - assert(_is_initialized == false); - - _step_x = step_x; - _step_y = step_y; - _step_z = step_z; - _tensor_info_id = tensor_info_id; - _sampler = create_sampler(tensor_storage, tensor_format); - _is_initialized = true; - }; - - GpuSampler sampler() const - { - return _sampler; - }; - - int32_t step_x() const - { - return _step_x; - }; - - int32_t step_y() const - { - return _step_y; - }; - - int32_t step_z() const - { - return _step_z; - }; - -private: - GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format) - { - // Output can only be in output mode - assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly); - assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly); - - auto tensor = _tensor_info_id->shape; - - GpuSampler sampler; - sampler.format = tensor_format; - sampler.storage = tensor_storage; - sampler.address_mode_x = TensorSamplerAddressModeX::None; - sampler.address_mode_y = TensorSamplerAddressModeY::None; - sampler.address_mode_z = TensorSamplerAddressModeZ::None; - - // In the case of texture, we do not need any special checks at the border - if (tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr) - { - int32_t dim_x = 0; - int32_t dim_y = 0; - int32_t dim_z = 0; - - switch (tensor_format) - { - case TensorSamplerFormat::C_W_H: - dim_x = tensor[0]; - dim_y = tensor[1]; - dim_z = tensor[2]; - break; - case TensorSamplerFormat::C_WH_1: - dim_x = tensor[0]; - dim_y = tensor[1] * tensor[2]; - dim_z = 1; - break; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; - } - - if ((dim_x % _step_x) != 0 && dim_x != 1) - { - sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin; - } - - if ((dim_y % _step_y) != 0 && dim_y != 1) - { - sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly; - } - - if ((dim_z % _step_z) != 0 && dim_z != 1) - { - sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly; - } - } - - return sampler; - } - - GpuSampler _sampler{}; // GpuSampler - int32_t _step_x{1}; - int32_t _step_y{1}; - int32_t _step_z{1}; - const TensorInfo *_tensor_info_id{nullptr}; - bool _is_initialized{false}; -}; - -/** - * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer. - */ -class TensorOperand -{ -public: - TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler) - { - } - - TensorOperand &operator=(const TensorOperand &t) - { - _str = t.value(); - _sampler = t.sampler(); - return *this; - } - - std::string value() const - { - return _str; - } - - GpuSampler sampler() const - { - return _sampler; - } - -private: - std::string _str{}; - GpuSampler _sampler{}; -}; - -/** - * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer - * This data structure must be initialized before being passed to the Gpu Kernel Writer - * - */ -class GpuKernelWriterDataHolder -{ -public: - /** - * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store - * the GPU target and target specific capabilities (extensions). For now, we just initialize the - * programming language - * - * @param[in] language Gpu programming language to use - */ - GpuKernelWriterDataHolder(GpuTargetLanguage language) - : tiles(language), arguments(language), code(""), _language(language) - { - } - - /** - * @brief Get the Gpu programming language used - * - * @return GpuTargetLanguage the Gpu programming language - */ - GpuTargetLanguage programming_language() const - { - return _language; - } - - /** - * @brief @ref GpuTileRegistry - * - */ - GpuTileRegistry tiles{}; - /** - * @brief @ref GpuTensorArgumentRegistry - * - */ - GpuTensorArgumentRegistry arguments{}; - /** - * @brief @ref GpuOutputSampler. - * - */ - GpuOutputSampler output_sampler{}; - /** - * @brief Source code - * - */ - std::string code{}; - - // GpuExtensionRegistry extensions{}; -private: - GpuTargetLanguage _language; -}; - -struct LWS -{ - int32_t x{1}; - int32_t y{1}; - int32_t z{1}; -}; - -/** - * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker - * declare an anonymous tile in the tile registry. - */ -class OperandUnpacker -{ -public: - OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) : _tiles(tiles), _arguments(arguments) - { - // Increase the level of the stack to allocate possible temporary tiles - _tiles.increment_registry_level(); - }; - - ~OperandUnpacker() - { - // Decrease the level of the stack to deallocate any temporary tiles - _tiles.decrement_registry_level(); - } - - IVectorTile *unpack(const Operand &src) - { - // Get the tile - if (src.type() == OperandType::Tile) - { - assert(_tiles.has_tile(src.value())); - return _tiles[src.value()]; - } - // Create an anonymous tile with a constant - else if (static_cast<int32_t>(src.type()) & 0x00001000) - { - if (src.type() == OperandType::ScalarTile) - { - ScalarTileCoord coord = src.scalar_tile_coordinate(); - assert(_tiles.has_tile(src.value())); - assert(coord.x >= 0); - assert(coord.y >= 0); - auto val = _tiles[src.value()]->scalar(coord.x, coord.y); - return _tiles.insert({{{val.str}}}, val.type.dt); - } - else - { - return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type())); - } - } - // Create an anonymous tile with the tensor component - else - { - assert(_arguments.has_tensor_argument(src.value())); - auto x = _arguments[src.value()]; - const std::string val = x->component(to_tensor_component(src.type())); - const DataType dt = x->component_data_type(); - return _tiles.insert({{{val}}}, dt); - } - } - -private: - DataType to_tile_data_type(OperandType x) - { - return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff); - } - - TensorComponentType to_tensor_component(OperandType x) - { - switch (x) - { - case OperandType::TensorDim0: - return TensorComponentType::Dim0; - case OperandType::TensorDim1: - return TensorComponentType::Dim1; - case OperandType::TensorDim2: - return TensorComponentType::Dim2; - case OperandType::TensorDim3: - return TensorComponentType::Dim3; - case OperandType::TensorDim4: - return TensorComponentType::Dim4; - case OperandType::TensorStride1: - return TensorComponentType::Stride1; - case OperandType::TensorStride2: - return TensorComponentType::Stride2; - case OperandType::TensorStride3: - return TensorComponentType::Stride3; - case OperandType::TensorStride4: - return TensorComponentType::Stride4; - case OperandType::TensorDim1xDim2: - return TensorComponentType::Dim1xDim2; - case OperandType::TensorDim1xDim2xDim3: - return TensorComponentType::Dim1xDim2xDim3; - case OperandType::TensorDataOffset: - return TensorComponentType::OffsetFirstElement; - default: - assert(false); - return TensorComponentType::Unknown; - } - } - - GpuTileRegistry &_tiles; - GpuTensorArgumentRegistry &_arguments; -}; - -/** - * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker - * declare an anonymous tile in the tile registry. - * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure. - */ -class TensorOperandUnpacker -{ -public: - TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) : _arguments(arguments){}; - - IGpuTensorArgument *unpack(const TensorOperand &src) - { - assert(_arguments.has_tensor_argument(src.value())); - return _arguments[src.value()]; - } - -private: - GpuTensorArgumentRegistry &_arguments; -}; - -/** - * @brief The GpuKernel will be used in three occasions (stages): - * #- Compilation stage - * #- Tuning stage - * #- Dispatch stage - */ -struct GpuKernel -{ - // Compilation stage - std::string code{}; // Source code, required for the compilation stage - std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage - // Tuning stage - std::string config_id{}; // Unique id, required for the tuning stage - std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage - // Dispatch stage - GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage - std::vector<std::pair<int32_t, GpuTensorStorage>> - list_tensor_storages; // List of tensor storages, required for the dispatch stage - std::vector<std::pair<int32_t, TensorComponentType>> - list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage) -}; - -// Generate all extension pragmas (hardcoded for now) -inline std::string generate_extensions() -{ - std::string ext = R"( -#if defined(cl_khr_fp16) -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -#endif // defined(cl_khr_fp16) - -#if defined(cl_arm_integer_dot_product_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable -#endif // defined(cl_arm_integer_dot_product_int8) - -#if defined(cl_arm_integer_dot_product_accumulate_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable -#endif // defined(cl_arm_integer_dot_product_accumulate_int8) - -#if defined(cl_arm_printf) -#pragma OPENCL EXTENSION cl_arm_printf : enable -#endif // defined(cl_arm_printf); -)"; - return ext; -} - -// This function should produce an object with the source -inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name) -{ - std::string code; - code += generate_extensions(); - code += "__kernel void "; - code += name; - code += "(\n"; - - auto IdSpaces = in.arguments.IdSpace_declarations(); - - std::vector<std::string> arg_str; - - auto tensor_args = in.arguments.tensor_argument_declarations(); - - for (auto &i : tensor_args) - { - // For each tensor used, get the storage and tensor components - auto storages = i->storage_declarations(); - auto components = i->component_declarations(); - - for (auto &y : storages) - { - std::string str; - str += i->storage_type_declaration(y); - str += " "; - str += i->storage(y); - arg_str.push_back(str); - } - - for (auto &y : components) - { - std::string str; - str += i->component_type_declaration(); - str += " "; - str += i->component(y); - arg_str.push_back(str); - } - } - - for (size_t i = 0; i < arg_str.size(); ++i) - { - code += arg_str[i]; - if (i + 1 < arg_str.size()) - { - code += ",\n"; - } - } - - code += ")\n"; - code += "{\n"; - code += in.code; - code += "}\n"; - - return code; -} - -/** - * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know - * how to reduce the dimensionality of a tensor - * - */ -class GpuTensor3dMapper -{ -public: - GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor){}; - - std::string tensor_component_x() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Dim0); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_y() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return _tensor->component(TensorComponentType::Dim1xDim2); - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Dim1); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_z() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return "1"; - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Dim2); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_stride_y() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Stride1); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_stride_z() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return "0"; - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Stride2); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_stride_batch() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Stride3); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_x() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return t.shape[0] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_y() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return (t.shape[1] * t.shape[2]) == 1; - case TensorSamplerFormat::C_W_H: - return t.shape[1] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_z() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return true; - case TensorSamplerFormat::C_W_H: - return t.shape[2] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_batch() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return t.shape[3] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - GpuSampler gpu_sampler() const - { - return _sampler; - } - - IGpuTensorArgument *tensor_argument() const - { - return _tensor; - } - -private: - GpuSampler _sampler; - IGpuTensorArgument *_tensor; -}; - -struct GpuKernelWriterAttribute -{ - bool return_tensor_component_by_value{false}; -}; - -enum class RoundingMode -{ - None, - Rte, - Rtz, - Rtp, - Rtn -}; - -// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html -class IGpuKernelWriter -{ -public: - virtual ~IGpuKernelWriter() = default; - - virtual void set_IdSpace(int32_t id) = 0; - - virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0; - - virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0; - - virtual void declare_tile(const std::string &name, const TileInfo &info) = 0; - - virtual void - declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0; - - virtual void write_text(const std::string &x) = 0; - - virtual void compound_statement_begin() = 0; - - virtual void compound_statement_end() = 0; - - // Operations - virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0; - - virtual void - op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0; - - virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0; - - virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0; - - virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0; - - virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - - virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0; - - virtual void - op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0; - - virtual void op_binary_elementwise_function(const Operand &dst_name, - BinaryFunction func, - const Operand &first_name, - const Operand &second_name) = 0; - - virtual void op_ternary_elementwise_function(const Operand &dst_name, - TernaryFunction func, - const Operand &first_name, - const Operand &second_name, - const Operand &third_name) = 0; - - virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - - virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - - virtual void op_else_header() = 0; - - virtual void op_for_loop_header(const Operand &var_name, - BinaryOp cond_op, - const Operand &cond_value, - const Operand &update_var, - AssignmentOp update_op, - const Operand &update_value) = 0; - - virtual void op_load_indirect(const TensorOperand &tensor, - const Operand &dst, - const Operand &x, - const Operand &y_indirect, - const Operand &z, - const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; - - virtual void op_load_immediate(const TensorOperand &tensor, - const Operand &dst, - const Operand &x, - const Operand &y, - const Operand &z, - const Operand &b = Operand("0", OperandType::ScalarInt32), - const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; - - virtual void op_store_immediate(const TensorOperand &tensor, - const Operand &src, - const Operand &x, - const Operand &y, - const Operand &z, - const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; - - virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0; - - virtual void op_return() = 0; - - // Utils - // It is the process of converting - virtual void util_get_indirect_buffer(const Operand &dst, - const TensorOperand &tensor, - const Operand &x, - const Operand &y, - const Operand &x_off, - const Operand &y_off) = 0; -}; - -enum class GpuLoadStoreType -{ - Load = 1, - Store = 2 -}; - -class IGpuLoadStoreHelperWriter -{ -public: - IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type) - : _writer(x), _mapper(mapper), _type(type) - { - } - - IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default; - - IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default; - - virtual ~IGpuLoadStoreHelperWriter() = default; - - virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0; - - virtual void write(const std::pair<int32_t, std::string> &y) = 0; - - virtual void finalize() = 0; - -protected: - IGpuKernelWriter *_writer; - GpuTensor3dMapper _mapper; - GpuLoadStoreType _type; -}; - -class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter -{ -public: - ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) - : IGpuLoadStoreHelperWriter(x, mapper, type) - { - } - - ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default; - - ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default; - - static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) - { - CKW_UNUSED(x, type, dst); - - if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr) - { - return false; - } - return true; - } - - void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override - { - assert(validate(_writer, _mapper, _type, dst)); - - _dst = dst; - _ls_width_full = dst->format().w; - - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - _coord_orig_z = _coord_z; - - out_of_bound_initialize_x(_coord_x); - out_of_bound_initialize_z(_coord_z); - - /* - meaning of else: - - x: partial load/store - - y: no load/store operation - - z: no load/store operation - if(x) - { - if(z) - { - if(y) - { - // full load/store width - } - else - { - // no load/store - } - } - else - { - // no load/store - } - } - else - { - if(z) - { - if(y) - { - // partial load/store width - } - else - { - // no load/store - } - } - else - { - // no load/store - } - } - */ - } - - void write(const std::pair<int32_t, std::string> &y) override - { - int32_t idx_y = y.first; - std::string coord_y = y.second; - - // The only check required is on Y. - out_of_bound_initialize_y(coord_y); - - const std::string dst = _dst->vector(idx_y).str; - const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b); - const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address); - - _writer->write_text(ls_buf); - _writer->write_text(";\n"); - - out_of_bound_finalize_y(dst); - - // The left over load/store will be written in the finalize stage - if (_ls_width_part.size() != 0) - { - int32_t w = 0; - for (auto &p : _ls_width_part) - { - const std::string dst0 = _dst->vector(w, p, idx_y).str; - const std::string coord_x = _coord_x + " + " + std::to_string(w); - const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b); - const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address); - _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0)); - - w += p; - } - } - } - - void finalize() override - { - out_of_bound_finalize_z(); - out_of_bound_finalize_x(); - } - -private: - IVectorTile *_dst{nullptr}; - int32_t _ls_width_full{0}; - std::vector<int32_t> _ls_width_part{}; - std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{}; - std::string _coord_x{}; - std::string _coord_z{}; - std::string _coord_orig_z{}; - std::string _coord_b{}; - - void out_of_bound_initialize_x(std::string &coord) - { - if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) - { - auto tensor_format = _mapper.tensor_argument()->format(); - auto shape = tensor_format.shape; - - _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full); - if (_ls_width_part.size() != 0) - { - _writer->write_text("if(" + coord + " > 0)\n"); - _writer->compound_statement_begin(); - } - } - }; - - void out_of_bound_finalize_x() - { - if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) - { - if (_ls_width_part.size() != 0) - { - _writer->compound_statement_end(); - _writer->write_text("else\n"); - _writer->compound_statement_begin(); - - out_of_bound_initialize_z(_coord_orig_z); - for (auto &i : _leftovers_x) - { - out_of_bound_initialize_y(i.first.second); - _writer->write_text(i.second); - _writer->write_text(";\n"); - out_of_bound_finalize_y(i.first.first); - } - out_of_bound_finalize_z(); - _writer->compound_statement_end(); - } - } - }; - - void out_of_bound_initialize_y(std::string &coord) - { - std::string max = ""; - - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::ClampToBorder: - // NOTE: This line should not be moved outside of the switch statement. - // The reason for that is because when we query the component, the component is marked as used - // and added to the list of arguments of the kernel. Since, not in all cases this component is required, - // we should request the component only when used - max = _mapper.tensor_component_y(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - max = _mapper.tensor_component_y(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::ClampToNearest: - max = _mapper.tensor_component_y(); - coord = "clamp(" + coord + ", 0, " + max + " - 1)"; - break; - case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - max = _mapper.tensor_component_y(); - coord = "min(" + coord + ", " + max + " - 1)"; - break; - case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - coord = "max(" + coord + ", 0)"; - break; - case TensorSamplerAddressModeY::None: - break; - default: - std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; - assert(false); - } - }; - - void out_of_bound_finalize_y(const std::string &dst) - { - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - _writer->compound_statement_end(); - break; - case TensorSamplerAddressModeY::None: - break; - - default: - assert(false); - } - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - _writer->write_text("else\n"); - _writer->compound_statement_begin(); - _writer->write_text(dst); - _writer->write_text(" = 0.0f;\n"); - _writer->compound_statement_end(); - break; - case TensorSamplerAddressModeY::None: - break; - - default: - assert(false); - } - }; - - void out_of_bound_initialize_z(std::string &coord) - { - std::string max = ""; - - const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - - switch (address_mode_z) - { - case TensorSamplerAddressModeZ::Skip: - max = _mapper.tensor_component_z(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeZ::SkipMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: - max = _mapper.tensor_component_z(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeZ::ClampToNearest: - max = _mapper.tensor_component_z(); - coord = "clamp(" + coord + ", 0, " + max + " - 1)"; - break; - case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly: - max = _mapper.tensor_component_z(); - coord = "min(" + coord + ", " + max + " - 1)"; - break; - case TensorSamplerAddressModeZ::ClampToMinEdgeOnly: - coord = "max(" + coord + ", 0)"; - break; - case TensorSamplerAddressModeZ::None: - break; - default: - std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; - assert(false); - } - }; - - void out_of_bound_finalize_z() - { - const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - - switch (address_mode_z) - { - case TensorSamplerAddressModeZ::Skip: - case TensorSamplerAddressModeZ::SkipMinEdgeOnly: - case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: - _writer->compound_statement_end(); - break; - case TensorSamplerAddressModeZ::None: - break; - - default: - assert(false); - } - }; - - std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const - { - std::vector<int32_t> x; - - switch (ls_leftover_vector_width) - { - case 0: - break; - case 1: - case 2: - case 3: - case 4: - case 8: - case 16: - x.push_back(ls_leftover_vector_width); - break; - case 5: - x.push_back(4); - x.push_back(1); - break; - case 6: - x.push_back(4); - x.push_back(2); - break; - case 7: - x.push_back(4); - x.push_back(3); - break; - case 9: - x.push_back(8); - x.push_back(1); - break; - case 10: - x.push_back(8); - x.push_back(2); - break; - case 11: - x.push_back(8); - x.push_back(3); - break; - case 12: - x.push_back(8); - x.push_back(4); - break; - case 13: - x.push_back(8); - x.push_back(4); - x.push_back(1); - break; - case 14: - x.push_back(8); - x.push_back(4); - x.push_back(2); - break; - case 15: - x.push_back(8); - x.push_back(4); - x.push_back(3); - break; - - default: - assert(false); - } - return x; - } - - std::string - to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, const std::string &address) - { - switch (type) - { - case GpuLoadStoreType::Load: - if (vector_width != 1) - { - return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; - } - else - { - return data + " = *(" + address + ")"; - } - break; - case GpuLoadStoreType::Store: - if (vector_width != 1) - { - return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; - } - else - { - return "*(" + address + ") = " + data; - } - break; - default: - std::cout << "Unsupported GpuLoadStoreType" << std::endl; - assert(false); - return ""; - } - } - - std::string - to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const - { - auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage); - assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr); - const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); - const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); - - std::string address; - address += "(__global "; - address += dst_type; - address += "*)("; - address += ptr_buf; - if (x != "0" && (_mapper.is_one_component_x() != true)) - { - address += " + ("; - address += x + ") * sizeof(" + dst_type + ")"; - } - if (y != "0") - { - const std::string stride_y = _mapper.tensor_component_stride_y(); - address += " + ("; - address += y + ")"; - address += " * "; - address += stride_y; - } - if (z != "0") - { - const std::string stride_z = _mapper.tensor_component_stride_z(); - address += " + ("; - address += z + ")"; - address += " * "; - address += stride_z; - } - if (b != "0" && (_mapper.is_one_component_batch() != true)) - { - const std::string stride_b = _mapper.tensor_component_stride_batch(); - address += " + ("; - address += b + ")"; - address += " * "; - address += stride_b; - } - address += ")"; - return address; - } -}; - -class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter -{ -public: - static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst) - { - CKW_UNUSED(x); - - if (dst->format().w != 4) - { - return false; - } - if (mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None) - { - return false; - } - if (mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None) - { - return false; - } - if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load) - { - return false; - } - if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && - type == GpuLoadStoreType::Store) - { - return false; - } - if ((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16)) - { - return false; - } - return true; - /* - - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4 - - z: Only GpuSamplerAddressModeZ::None is supported - */ - } - - ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) - : IGpuLoadStoreHelperWriter(x, mapper, type) - { - } - - ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default; - - ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default; - - void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override - { - assert(validate(_writer, _mapper, _type, dst)); - - _dst = dst; - _ls_width_full = dst->format().w; - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - - /* - if(y) - { - // full load/store width - } - else - { - // no load/store - } - */ - } - - void write(const std::pair<int32_t, std::string> &y) override - { - int32_t idx_y = y.first; - std::string coord_y = y.second; - - // The only check required is on Y. - out_of_bound_initialize_y(coord_y); - - const std::string dst = _dst->vector(idx_y).str; - const std::string sampler = to_ls_image2d_sampler(); - const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b); - const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord); - - _writer->write_text(ls_buf); - _writer->write_text(";\n"); - - out_of_bound_finalize_y(dst); - } - - void finalize() override - { - } - -private: - IVectorTile *_dst{nullptr}; - int32_t _ls_width_full{0}; - std::string _coord_x{}; - std::string _coord_z{}; - std::string _coord_b{}; - - void out_of_bound_initialize_y(std::string &coord) - { - std::string max = ""; - - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::Skip: - max = _mapper.tensor_component_y(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - max = _mapper.tensor_component_y(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToNearest: - case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - case TensorSamplerAddressModeY::None: - break; - default: - std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl; - assert(false); - } - }; - - void out_of_bound_finalize_y(const std::string &dst) - { - CKW_UNUSED(dst); - - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - _writer->compound_statement_end(); - break; - - default: - assert(false); - } - }; - - std::string to_ls_image2d(GpuLoadStoreType type, - int32_t vector_width, - const std::string &data, - const std::string &sampler, - const std::string &coord) - { - CKW_UNUSED(vector_width); - - auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage); - const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage); - const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h"; - - switch (type) - { - case GpuLoadStoreType::Load: - return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")"; - break; - case GpuLoadStoreType::Store: - return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")"; - default: - assert(false); - std::cout << "Unsupported GpuLoadStoreType" << std::endl; - assert(false); - return ""; - } - } - - std::string to_ls_image2d_sampler() const - { - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::None: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST"; - case TensorSamplerAddressModeY::ClampToNearest: - case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST"; - default: - std::cout << "Unsupported address_mode_coord" << std::endl; - assert(false); - return ""; - } - } - - std::string - to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const - { - std::string coord_x = "(" + x + ") >> 2"; - std::string coord_y = "("; - - if (y != "0") - { - coord_y += y; - } - if (z != "0" && (_mapper.is_one_component_z() != true)) - { - const std::string dim = _mapper.tensor_component_y(); - coord_y += " + ("; - coord_y += z + ")"; - coord_y += " * "; - coord_y += dim; - } - if (b != "0" && (_mapper.is_one_component_batch() != true)) - { - const std::string dim0 = _mapper.tensor_component_y(); - const std::string dim1 = _mapper.tensor_component_z(); - coord_y += " + ("; - coord_y += b + ")"; - coord_y += " * "; - coord_y += dim0; - coord_y += " * "; - coord_y += dim1; - } - coord_y += ")"; - return "(int2)(" + coord_x + ", " + coord_y + ")"; - } -}; - -/** IGpuLoadStoreHelperWriter factory class */ -class ClLoadStoreHelperWriterFactory final -{ -public: - /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper - * - * - * @return IGpuLoadStoreHelperWriter - */ - static std::unique_ptr<IGpuLoadStoreHelperWriter> - create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) - { - const auto tensor_storage = mapper.gpu_sampler().storage; - switch (tensor_storage) - { - case GpuSamplerTensorStorage::BufferUint8Ptr: - return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type); - case GpuSamplerTensorStorage::Image2dReadOnly: - case GpuSamplerTensorStorage::Image2dWriteOnly: - return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type); - default: - std::cout << "Unsupported Gpu tensor storage" << std::endl; - assert(false); - return nullptr; - } - } -}; - -// This utility method needs to go in utils.h -inline bool is_tile_scalar(const IVectorTile *x) -{ - return x->format().w == 1 && x->format().h == 1; -} - -class ClKernelWriter : public IGpuKernelWriter -{ -public: - ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) - { - _data = x; - _attr = attr; - } - - ClKernelWriter(const ClKernelWriter &) = default; - - ClKernelWriter &operator=(const ClKernelWriter &) = default; - - // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure - // there are no conflicts or ambiguity in the code - void set_IdSpace(int32_t id) override - { - _data->tiles.set_IdSpace(id); - _data->arguments.set_IdSpace(id); - } - - void import_tile(const std::string &dst_name, const IVectorTile *src) override - { - _data->tiles.insert(dst_name, src); - } - - void declare_argument(const std::string &name, const TensorInfo &tensor) override - { - assert(_data->arguments[name] == nullptr); - _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value); - } - - void declare_tile(const std::string &name, const TileInfo &format) override - { - assert(_data->tiles[name] == nullptr); - _data->tiles.insert(name, format); - - IVectorTile *x = _data->tiles[name]; - - for (auto &t : x->underlying_source_variables()) - { - _data->code += t.type.str + " " + t.str + ";\n"; - } - } - - void - declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) override - { - assert(_data->tiles[name] == nullptr); - _data->tiles.insert(name, in, dt); - // Note: A constant does not need to be declared in the code - } - - void write_text(const std::string &x) override - { - _data->code += x; - } - - void compound_statement_begin() override - { - _data->tiles.increment_registry_level(); - _data->code += "{\n"; - } - - void compound_statement_end() override - { - _data->tiles.decrement_registry_level(); - _data->code += "}\n"; - } - - void op_get_global_id(const Operand &dst_var, int32_t dim) override - { - assert(dst_var.type() == OperandType::Tile); - assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && - _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable - - auto var = _data->tiles[dst_var.value()]; - - _data->code += var->scalar(0, 0).str; - _data->code += " = get_global_id("; - _data->code += std::to_string(dim); - _data->code += ");\n"; - }; - - void op_get_global_coord(const Operand &o_dst, - const Operand &o_step, - const TensorOperand &o_tensor, - int32_t dim) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto step = operands.unpack(o_step); - - // Validation: Check that x, y and z are scalar - - TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - switch (dim) - { - case 0: - if (mapper.is_one_component_x()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - if (mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) - { - // Validation: Check: fixed tensor shape - // TO BE CHANGED - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(0) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(0) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - } - break; - case 1: - if (mapper.is_one_component_y()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - if (mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) - { - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(1) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - } - break; - case 2: - if (mapper.is_one_component_z()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(2) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - break; - default: - break; - } - }; - - void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *dst = operands.unpack(o_dst); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - if (mapper.is_one_component_batch()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - std::cout << "Unsupported batched computation" << std::endl; - assert(false); - } - }; - - void op_get_global_size(const Operand &dst_var, int32_t dim) override - { - assert(dst_var.type() == OperandType::Tile); - assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && - _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable - - auto var = _data->tiles[dst_var.value()]; - - _data->code += var->scalar(0, 0).str; - _data->code += " = get_global_size("; - _data->code += std::to_string(dim); - _data->code += ");\n"; - } - - void op_unary_expression(const Operand &dst_name, UnaryOp op, const Operand &src_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *src = operands.unpack(src_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const int32_t src_w = src->format().w; - const std::string dt = dst->underlying_source_variables()[0].type.str; - - const bool broadcast_src_x = dst_w != 1 && src_w == 1; - - const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - _data->code += to_string(op); - _data->code += src_prefix + src->vector(y).str; - _data->code += ";\n"; - } - } - - void op_binary_expression(const Operand &dst_name, - const Operand &lhs_name, - BinaryOp op, - const Operand &rhs_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *lhs = operands.unpack(lhs_name); - const IVectorTile *rhs = operands.unpack(rhs_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - assert(lhs != nullptr); - const int32_t lhs_w = lhs->format().w; - const int32_t rhs_w = rhs->format().w; - - if (op == BinaryOp::MatMul_Nt_T) - { - assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16)); - for (int32_t y = 0; y < dst_h; ++y) - { - for (int32_t x = 0; x < dst_w; ++x) - { - for (int32_t k = 0; k < lhs_w; ++k) - { - _data->code += dst->scalar(x, y).str; - _data->code += " = fma("; - _data->code += lhs->scalar(k, y).str; - _data->code += ", "; - _data->code += rhs->scalar(k, x).str; - _data->code += ", "; - _data->code += dst->scalar(x, y).str; - _data->code += ");\n"; - } - } - } - - return; - } - - const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1; - const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1; - - const std::string lhs_prefix = - broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - const std::string rhs_prefix = - broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - const std::string op_str = to_string(op); - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - _data->code += lhs_prefix + lhs->vector(y).str; - _data->code += " "; - _data->code += op_str; - _data->code += " "; - _data->code += rhs_prefix + rhs->vector(y).str; - _data->code += ";\n"; - } - }; - - void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *src = operands.unpack(o_src); - const IVectorTile *dst = operands.unpack(o_dst); - // const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; - const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16); - const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : ""); - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = convert_" + dt + sat + "("; - _data->code += src->vector(y).str; - _data->code += ");\n"; - } - }; - - void op_assign(const Operand &dst_name, const Operand &src_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *src = operands.unpack(src_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const int32_t src_w = src->format().w; - const std::string dt = dst->underlying_source_variables()[0].type.str; - - const bool broadcast_src_x = dst_w != 1 && src_w == 1; - - const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - _data->code += src_prefix + src->vector(y).str; - _data->code += ";\n"; - } - } - - void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *src = operands.unpack(src_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; - - // Always perform an explicit cast. This automatically covers at least the 2 scenarios: - // 1. Widen a scalar into a vector type. This enables scalar-vector broadcasting - // 2. Ensure non-ambiguity over function overloads. - // E.g. a constant tile may be accidentally initialized with a double literal. By casting it to single float, - // it avoids ambiguous function calls - const std::string src_prefix = "(" + dt + ")"; - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - - switch (func) - { - case UnaryFunction::Exp: - _data->code += "exp("; - break; - case UnaryFunction::Tanh: - _data->code += "tanh("; - break; - case UnaryFunction::Sqrt: - _data->code += "sqrt("; - break; - case UnaryFunction::Erf: - _data->code += "erf("; - break; - case UnaryFunction::Fabs: - _data->code += "fabs("; - break; - case UnaryFunction::Log: - _data->code += "log("; - break; - case UnaryFunction::SizeOf: - _data->code += "sizeof("; - break; - case UnaryFunction::Round: - _data->code += "round("; - break; - case UnaryFunction::Floor: - _data->code += "floor("; - break; - default: - CKW_ASSERT_MSG(false, "Unexpected UnaryFunction used."); - } - - _data->code += src_prefix + src->vector(y).str; - _data->code += ");\n"; - } - } - - void op_binary_elementwise_function(const Operand &dst_name, - BinaryFunction func, - const Operand &first_name, - const Operand &second_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *first = operands.unpack(first_name); - const IVectorTile *second = operands.unpack(second_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_h = dst->format().h; - const auto datatype = dst->underlying_source_variables()[0].type; - const std::string datatype_str = datatype.str; - - // Always perform an explicit cast. See similar comments in op_unary_elementwise_function - const std::string first_prefix = "(" + datatype_str + ")"; - const std::string second_prefix = "(" + datatype_str + ")"; - - const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16); - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - - switch (func) - { - case BinaryFunction::Min: - _data->code += is_float ? "fmin(" : "min("; - break; - case BinaryFunction::Max: - _data->code += is_float ? "fmax(" : "max("; - break; - default: - CKW_ASSERT_MSG(false, "Unexpected BinaryFunction used."); - } - - _data->code += first_prefix + first->vector(y).str; - _data->code += ", "; - _data->code += second_prefix + second->vector(y).str; - _data->code += ");\n"; - } - } - - void op_ternary_elementwise_function(const Operand &dst_name, - TernaryFunction func, - const Operand &first_name, - const Operand &second_name, - const Operand &third_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *first = operands.unpack(first_name); - const IVectorTile *second = operands.unpack(second_name); - const IVectorTile *third = operands.unpack(third_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; - - // Always perform an explicit cast. See similar comments in op_unary_elementwise_function - const std::string first_prefix = "(" + dt + ")"; - const std::string second_prefix = "(" + dt + ")"; - const std::string third_prefix = "(" + dt + ")"; - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - - switch (func) - { - case TernaryFunction::Select: - _data->code += "select("; - break; - case TernaryFunction::Clamp: - _data->code += "clamp("; - break; - default: - CKW_ASSERT_MSG(false, "Unexpected TernaryFunction used."); - } - - _data->code += first_prefix + first->vector(y).str; - _data->code += ", "; - _data->code += second_prefix + second->vector(y).str; - _data->code += ", "; - _data->code += third_prefix + third->vector(y).str; - _data->code += ");\n"; - } - } - - void op_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *lhs = operands.unpack(o_lhs); - const IVectorTile *rhs = operands.unpack(o_rhs); - - assert(is_tile_scalar(lhs)); - assert(is_tile_scalar(rhs)); - - _data->code += "if("; - _data->code += lhs->scalar(0, 0).str; - _data->code += " "; - _data->code += to_string(op); - _data->code += " "; - _data->code += rhs->scalar(0, 0).str; - _data->code += ")\n"; - } - - void op_else_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override - { - _data->code += "else "; - op_if_header(o_lhs, op, o_rhs); - } - - void op_else_header() override - { - _data->code += "else\n"; - } - - void op_for_loop_header(const Operand &var_name, - BinaryOp cond_op, - const Operand &cond_value_name, - const Operand &update_var_name, - AssignmentOp update_op, - const Operand &update_value_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *var = operands.unpack(var_name); - const IVectorTile *cond_value = operands.unpack(cond_value_name); - const IVectorTile *update_var = operands.unpack(update_var_name); - const IVectorTile *update_value = operands.unpack(update_value_name); - - const int32_t dst_w = var->format().w; - const int32_t dst_h = var->format().h; - - // It must be a scalar variable - CKW_UNUSED(dst_w, dst_h); - assert(dst_w == 1); - assert(dst_h == 1); - - _data->code += "for(; "; - _data->code += var->scalar(0, 0).str; - _data->code += " "; - _data->code += to_string(cond_op); - _data->code += " " + cond_value->scalar(0, 0).str + "; "; - _data->code += update_var->scalar(0, 0).str; - _data->code += " "; - _data->code += to_string(update_op); - _data->code += " " + update_value->scalar(0, 0).str + ")"; - _data->code += "\n"; - } - - void op_load_immediate(const TensorOperand &o_tensor, - const Operand &o_dst, - const Operand &o_x, - const Operand &o_y, - const Operand &o_z, - const Operand &o_batch_idx, - const Operand &dilation_y) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - - // Not const as it requires changes to 'load_writer'. - IVectorTile *dst = operands.unpack(o_dst); - IVectorTile *x = operands.unpack(o_x); - IVectorTile *y = operands.unpack(o_y); - IVectorTile *z = operands.unpack(o_z); - IVectorTile *dil_y = operands.unpack(dilation_y); - IVectorTile *b = operands.unpack(o_batch_idx); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load); - - // Initialize the constant part - load_writer->initialize(dst, x, z, b); - - for (int i = 0; i < dst->format().h; ++i) - { - std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i); - if (dil_y->scalar(0, 0).str != "1") - { - coord_y += " * " + dil_y->scalar(0, 0).str; - } - load_writer->write(std::make_pair(i, coord_y)); - } - - load_writer->finalize(); - } - - void op_load_indirect(const TensorOperand &o_tensor, - const Operand &o_dst, - const Operand &o_x, - const Operand &o_indirect_h, - const Operand &o_z, - const Operand &o_batch_idx) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - - // Not const as it requires changes to 'load_writer'. - IVectorTile *dst = operands.unpack(o_dst); - IVectorTile *x = operands.unpack(o_x); - IVectorTile *y_ind = operands.unpack(o_indirect_h); - IVectorTile *z = operands.unpack(o_z); - IVectorTile *b = operands.unpack(o_batch_idx); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load); - - // Initialize the constant part - load_writer->initialize(dst, x, z, b); - - for (int i = 0; i < dst->format().h; ++i) - { - load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str)); - } - - load_writer->finalize(); - } - - void op_store_immediate(const TensorOperand &tensor_name, - const Operand &src_name, - const Operand &x_name, - const Operand &y_name, - const Operand &z_name, - const Operand &batch_index_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - - // Not const as it requires changes to 'load_writer'. - IVectorTile *src = operands.unpack(src_name); - IVectorTile *x = operands.unpack(x_name); - IVectorTile *y = operands.unpack(y_name); - IVectorTile *z = operands.unpack(z_name); - IVectorTile *b = operands.unpack(batch_index_name); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(tensor_name); - auto gpu_sampler = tensor_name.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store); - - // Initialize the constant part - store_writer->initialize(src, x, z, b); - - int32_t tile_h = src->format().h; - - for (int m0 = tile_h - 1; m0 >= 0; m0--) - { - store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0))); - } - - store_writer->finalize(); - } - - void op_return() override - { - _data->code += "return;\n"; - } - - void util_get_indirect_buffer(const Operand &o_dst, - const TensorOperand &o_tensor, - const Operand &o_x, - const Operand &o_y, - const Operand &o_x_off, - const Operand &o_y_off) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *dst = operands.unpack(o_dst); - const IVectorTile *x = operands.unpack(o_x); - const IVectorTile *y = operands.unpack(o_y); - const IVectorTile *x_off = operands.unpack(o_x_off); - const IVectorTile *y_off = operands.unpack(o_y_off); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor); - - assert(dst->format().w == 1); - assert(x->format().w == 1); - assert(y->format().w == 1); - assert(x_off->format().w == 1); - assert(y_off->format().w == 1); - assert(dst->format().dt == DataType::Int32); - assert(x->format().dt == DataType::Int32); - assert(y->format().dt == DataType::Int32); - assert(x_off->format().dt == DataType::Int32); - assert(y_off->format().dt == DataType::Int32); - - const std::string width = tensor->component(TensorComponentType::Dim1); - const std::string height = tensor->component(TensorComponentType::Dim2); - const std::string wxh = tensor->component(TensorComponentType::Dim1xDim2); - /* - int x_s; - int y_s; - x_s = (xi_0 + x_k); - y_s = (yi_0 + y_k); - mi_0 = x_s + y_s * width + b * widthxheight; - mi_0 = select(-1, mi_0, x_s >= 0); - mi_0 = select(-1, mi_0, y_s >= 0); - mi_0 = select(-1, mi_0, x_s < 128); - mi_0 = select(-1, mi_0, y_s < 128); - */ - compound_statement_begin(); - declare_tile("_x_s", TileInfo(DataType::Int32)); - declare_tile("_y_s", TileInfo(DataType::Int32)); - auto x_s = operands.unpack(Operand("_x_s")); - auto y_s = operands.unpack(Operand("_y_s")); - for (int i = 0; i < dst->format().h; ++i) - { - // x_s = (xi_0 + x_k); - // y_s = (yi_0 + y_k); - _data->code += x_s->scalar(0, i).str; - _data->code += " = ("; - _data->code += x->scalar(0, i).str; - _data->code += " + "; - _data->code += x_off->scalar(0, i).str; - _data->code += ");\n"; - _data->code += y_s->scalar(0, i).str; - _data->code += " = ("; - _data->code += y->scalar(0, i).str; - _data->code += " + "; - _data->code += y_off->scalar(0, i).str; - _data->code += ");\n"; - // mi_0 = x_s + y_s * width; - _data->code += dst->scalar(0, i).str; - _data->code += " = "; - _data->code += x_s->scalar(0, i).str; - _data->code += " + "; - _data->code += y_s->scalar(0, i).str; - _data->code += " * " + width + ";\n"; - // mi_0 = select(wxh, mi_0, x_s >= 0); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += x_s->scalar(0, i).str; - _data->code += " >= 0);\n"; - // mi_0 = select(wxh, mi_0, x_s < width); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += x_s->scalar(0, i).str; - _data->code += " < "; - _data->code += width + ");\n"; - // mi_0 = select(wxh, mi_0, y_s >= 0); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += y_s->scalar(0, i).str; - _data->code += " >= 0);\n"; - // mi_0 = select(wxh, mi_0, y_s < height); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += y_s->scalar(0, i).str; - _data->code += " < "; - _data->code += height + ");\n"; - } - compound_statement_end(); - } - -private: - GpuKernelWriterDataHolder *_data{nullptr}; - GpuKernelWriterAttribute *_attr{nullptr}; -}; - -/** IGpuKernelWriter factory class */ -class GpuKernelWriterFactory final -{ -public: - /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language - * - * @param[in] gpu GPU target - * - * @return IGpuKernelWriter - */ - static std::unique_ptr<IGpuKernelWriter> create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) - { - switch (x->programming_language()) - { - case GpuTargetLanguage::OpenCL: - return std::make_unique<ClKernelWriter>(attr, x); - default: - std::cout << "Unsupported Gpu programming language" << std::endl; - assert(false); - return nullptr; - } - } -}; - -inline int32_t -adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx) -{ - auto tensor = tensor_info_id->shape; - - int32_t dim[3] = {0}; - - switch (tensor_format) - { - case TensorSamplerFormat::C_W_H: - dim[0] = tensor[0]; - dim[1] = tensor[1]; - dim[2] = tensor[2]; - break; - case TensorSamplerFormat::C_WH_1: - dim[0] = tensor[0]; - dim[1] = tensor[1] * tensor[2]; - dim[2] = 1; - break; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; - } - - return std::min(step, dim[idx]); -} - -} // namespace prototype -} // namespace ckw - -#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H diff --git a/compute_kernel_writer/prototype/src/TensorInfo.cpp b/compute_kernel_writer/prototype/src/TensorInfo.cpp deleted file mode 100644 index 561c126469..0000000000 --- a/compute_kernel_writer/prototype/src/TensorInfo.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TensorInfo.h" - -namespace ckw -{ -TensorInfo::TensorInfo(DataType dt, const TensorShape &shape, TensorDataLayout dl, int32_t id) - : _shape(shape), _dt(dt), _dl(dl), _id(id) -{ -} - -TensorInfo &TensorInfo::shape(const TensorShape &shape) -{ - _shape = shape; - return *this; -} - -TensorShape TensorInfo::shape() const -{ - return _shape; -} - -TensorInfo &TensorInfo::data_type(DataType dt) -{ - _dt = dt; - return *this; -} - -DataType TensorInfo::data_type() const -{ - return _dt; -} - -TensorInfo &TensorInfo::data_layout(TensorDataLayout dl) -{ - _dl = dl; - return *this; -} - -TensorDataLayout TensorInfo::data_layout() const -{ - return _dl; -} - -TensorInfo &TensorInfo::id(int32_t id) -{ - _id = id; - return *this; -} - -int32_t TensorInfo::id() const -{ - return _id; -} -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TensorOperand.cpp b/compute_kernel_writer/prototype/src/TensorOperand.cpp deleted file mode 100644 index d1aefbbb71..0000000000 --- a/compute_kernel_writer/prototype/src/TensorOperand.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TensorOperand.h" - -#include "ckw/Error.h" -#include "ckw/Kernel.h" -#include "ckw/TensorInfo.h" -#include "ckw/TileOperand.h" - -#include "src/Prototype.h" - -namespace ckw -{ - -namespace -{ - -TensorComponentOperand &get_or_create_component(TensorOperand &tensor, - std::unique_ptr<TensorComponentOperand> &ptr, - TensorComponentType component) -{ - if (ptr == nullptr) - { - ptr = std::make_unique<TensorComponentOperand>(tensor, component); - } - - return *ptr; -} - -} // namespace - -// ================================================================================================= -// TensorOperand -// ================================================================================================= - -TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info, TensorStorageType storage_type) - : OperandBase(name), _info(info), _storage_type(storage_type) -{ -} - -prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const -{ - CKW_UNUSED(writer); - return {name()}; -} - -const TensorInfo &TensorOperand::info() const -{ - return _info; -} - -TensorInfo &TensorOperand::info() -{ - return _info; -} - -TensorStorageType TensorOperand::storage_type() const -{ - return _storage_type; -} - -DataType TensorOperand::data_type() const -{ - return _info.data_type(); -} - -bool TensorOperand::is_constant() const -{ - return false; -} - -const TileOperand &TensorOperand::tile() const -{ - return *_tile; -} - -TileOperand &TensorOperand::tile() -{ - return *_tile; -} - -TensorOperand &TensorOperand::tile(TileOperand &tile) -{ - _tile = &tile; - return *this; -} - -const TensorTileSampler &TensorOperand::tile_sampler() const -{ - return _tile_sampler; -} - -TensorTileSampler &TensorOperand::tile_sampler() -{ - return _tile_sampler; -} - -TensorOperand &TensorOperand::tile_sampler(const TensorTileSampler &value) -{ - _tile_sampler = value; - return *this; -} - -TensorComponentOperand &TensorOperand::stride1() -{ - return get_or_create_component(*this, _stride1, TensorComponentType::Stride1); -} - -TensorComponentOperand &TensorOperand::stride2() -{ - return get_or_create_component(*this, _stride2, TensorComponentType::Stride2); -} - -TensorComponentOperand &TensorOperand::stride3() -{ - return get_or_create_component(*this, _stride3, TensorComponentType::Stride3); -} - -TensorComponentOperand &TensorOperand::stride4() -{ - return get_or_create_component(*this, _stride4, TensorComponentType::Stride4); -} - -TensorComponentOperand &TensorOperand::dim0() -{ - return get_or_create_component(*this, _dim0, TensorComponentType::Dim0); -} - -TensorComponentOperand &TensorOperand::dim1() -{ - return get_or_create_component(*this, _dim1, TensorComponentType::Dim1); -} - -TensorComponentOperand &TensorOperand::dim2() -{ - return get_or_create_component(*this, _dim2, TensorComponentType::Dim2); -} - -TensorComponentOperand &TensorOperand::dim3() -{ - return get_or_create_component(*this, _dim3, TensorComponentType::Dim3); -} - -TensorComponentOperand &TensorOperand::dim4() -{ - return get_or_create_component(*this, _dim4, TensorComponentType::Dim4); -} - -TensorComponentOperand &TensorOperand::dim1_dim2() -{ - return get_or_create_component(*this, _dim1_dim2, TensorComponentType::Dim1xDim2); -} - -TensorComponentOperand &TensorOperand::dim1_dim2_dim3() -{ - return get_or_create_component(*this, _dim1_dim2_dim3, TensorComponentType::Dim1xDim2xDim3); -} - -TensorComponentOperand &TensorOperand::offset_first_element_in_bytes() -{ - return get_or_create_component(*this, _offset_first_element_in_bytes, TensorComponentType::OffsetFirstElement); -} - -// ================================================================================================= -// TensorComponentOperand -// ================================================================================================= - -TensorComponentOperand::TensorComponentOperand(TensorOperand &tensor, TensorComponentType component) - : TileOperand(tensor.name(), DataType::Int32), _tensor(tensor), _component(component) -{ -} - -TensorOperand &TensorComponentOperand::tensor() -{ - return _tensor; -} - -const TensorOperand &TensorComponentOperand::tensor() const -{ - return _tensor; -} - -TensorComponentType TensorComponentOperand::component_type() const -{ - return _component; -} - -prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const -{ - CKW_UNUSED(writer); - prototype::OperandType type{prototype::OperandType::Unknown}; - - switch (_component) - { - case TensorComponentType::OffsetFirstElement: - type = prototype::OperandType::TensorDataOffset; - break; - - case TensorComponentType::Stride1: - type = prototype::OperandType::TensorStride1; - break; - - case TensorComponentType::Stride2: - type = prototype::OperandType::TensorStride2; - break; - - case TensorComponentType::Stride3: - type = prototype::OperandType::TensorStride3; - break; - - case TensorComponentType::Stride4: - type = prototype::OperandType::TensorStride4; - break; - - case TensorComponentType::Dim0: - type = prototype::OperandType::TensorDim0; - break; - - case TensorComponentType::Dim1: - type = prototype::OperandType::TensorDim1; - break; - - case TensorComponentType::Dim2: - type = prototype::OperandType::TensorDim2; - break; - - case TensorComponentType::Dim3: - type = prototype::OperandType::TensorDim3; - break; - - case TensorComponentType::Dim4: - type = prototype::OperandType::TensorDim4; - break; - - case TensorComponentType::Dim1xDim2: - type = prototype::OperandType::TensorDim1xDim2; - break; - - case TensorComponentType::Dim1xDim2xDim3: - type = prototype::OperandType::TensorDim1xDim2xDim3; - break; - - default: - CKW_ASSERT(false); - } - - return prototype::Operand(name(), type); -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp deleted file mode 100644 index bf9f946ce8..0000000000 --- a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TensorTileSampler.h" - -#include "ckw/TileOperand.h" -#include "ckw/types/TensorSamplerTypes.h" - -namespace ckw -{ - -TensorTileSampler::TensorTileSampler() -{ -} - -TensorTileSampler::TensorTileSampler(TileOperand &x, - TileOperand &y, - TileOperand &z, - TileOperand &b, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), - _y(&y), - _z(&z), - _b(&b), - _height(0), - _width(0), - _format(format), - _address_mode_x(address_mode_x), - _address_mode_y(address_mode_y), - _address_mode_z(address_mode_z) -{ -} - -TensorTileSampler::TensorTileSampler(TileOperand &x, - TileOperand &y, - TileOperand &z, - TileOperand &b, - int32_t height, - int32_t width, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), - _y(&y), - _z(&z), - _b(&b), - _height(height), - _width(width), - _format(format), - _address_mode_x(address_mode_x), - _address_mode_y(address_mode_y), - _address_mode_z(address_mode_z) -{ -} - -const TileOperand &TensorTileSampler::x() const -{ - return *_x; -} - -TensorTileSampler &TensorTileSampler::x(TileOperand &x) -{ - _x = &x; - return *this; -} - -const TileOperand &TensorTileSampler::y() const -{ - return *_y; -} - -TensorTileSampler &TensorTileSampler::y(TileOperand &y) -{ - _y = &y; - return *this; -} - -const TileOperand &TensorTileSampler::z() const -{ - return *_z; -} - -TensorTileSampler &TensorTileSampler::z(TileOperand &z) -{ - _z = &z; - return *this; -} - -const TileOperand &TensorTileSampler::b() const -{ - return *_b; -} - -TensorTileSampler &TensorTileSampler::b(TileOperand &b) -{ - _b = &b; - return *this; -} - -int32_t TensorTileSampler::width() const -{ - return _width; -} - -TensorTileSampler &TensorTileSampler::width(int32_t width) -{ - _width = width; - return *this; -} - -int32_t TensorTileSampler::height() const -{ - return _height; -} - -TensorTileSampler &TensorTileSampler::height(int32_t height) -{ - _height = height; - return *this; -} - -TensorSamplerFormat TensorTileSampler::format() const -{ - return _format; -} - -TensorTileSampler &TensorTileSampler::format(TensorSamplerFormat format) -{ - _format = format; - return *this; -} - -TensorSamplerAddressModeX TensorTileSampler::address_mode_x() const -{ - return _address_mode_x; -} - -TensorTileSampler &TensorTileSampler::address_mode_x(TensorSamplerAddressModeX address_mode_x) -{ - _address_mode_x = address_mode_x; - return *this; -} - -TensorSamplerAddressModeY TensorTileSampler::address_mode_y() const -{ - return _address_mode_y; -} - -TensorTileSampler &TensorTileSampler::address_mode_y(TensorSamplerAddressModeY address_mode_y) -{ - _address_mode_y = address_mode_y; - return *this; -} - -TensorSamplerAddressModeZ TensorTileSampler::address_mode_z() const -{ - return _address_mode_z; -} - -TensorTileSampler &TensorTileSampler::address_mode_z(TensorSamplerAddressModeZ address_mode_z) -{ - _address_mode_z = address_mode_z; - return *this; -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TileInfo.cpp b/compute_kernel_writer/prototype/src/TileInfo.cpp deleted file mode 100644 index 273266eedc..0000000000 --- a/compute_kernel_writer/prototype/src/TileInfo.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TileInfo.h" - -namespace ckw -{ -TileInfo::TileInfo(DataType dt) : _dt(dt), _shape({{1, 1}}) -{ -} - -TileInfo::TileInfo(DataType dt, int32_t w) : _dt(dt), _shape({{w, 1}}) -{ -} - -TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) : _dt(dt), _shape({{w, h}}) -{ -} - -TileInfo &TileInfo::width(int32_t w) -{ - _shape[kTileWidthIdx] = w; - return *this; -} - -int32_t TileInfo::width() const -{ - return _shape[kTileWidthIdx]; -} - -TileInfo &TileInfo::height(int32_t h) -{ - _shape[kTileHeightIdx] = h; - return *this; -} - -int32_t TileInfo::height() const -{ - return _shape[kTileHeightIdx]; -} - -TileInfo &TileInfo::data_type(DataType dt) -{ - _dt = dt; - return *this; -} - -DataType TileInfo::data_type() const -{ - return _dt; -} -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp deleted file mode 100644 index e09c833d96..0000000000 --- a/compute_kernel_writer/prototype/src/TileOperand.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TileOperand.h" - -#include "ckw/Error.h" - -#include "src/Prototype.h" - -namespace ckw -{ - -TileOperand::TileOperand(const std::string &name, const TileInfo &info) - : OperandBase(name), _info(info), _value{std::vector<std::string>{"0"}}, _constant(false) -{ -} - -TileOperand::TileOperand(const std::string &name, DataType data_type) - : OperandBase(name), _info(TileInfo{data_type}), _value{std::vector<std::string>{"0"}}, _constant(false) -{ -} - -TileOperand::TileOperand(const std::string &name, int32_t value) - : OperandBase(name), - _info(TileInfo{DataType::Int32}), - _value{std::vector<std::string>{std::to_string(value)}}, - _constant(true) -{ -} - -TileOperand::TileOperand(const std::string &name, float value) - : OperandBase(name), - _info(TileInfo{DataType::Fp32}), - _value{std::vector<std::string>{std::to_string(value)}}, - _constant(true) -{ -} - -TileOperand::TileOperand(const std::string &name, const TileContainer &vals, DataType dt) - : OperandBase(name), - _info(TileInfo{dt, static_cast<int32_t>(vals.size()), static_cast<int32_t>(vals[0].size())}), - _value(vals), - _constant(true) -{ -} - -prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const -{ - CKW_UNUSED(writer); - - if (_constant) - { - if (is_scalar()) - { - switch (_info.data_type()) - { - case DataType::Int32: - return prototype::Operand(_value[0][0], prototype::OperandType::ScalarInt32); - - case DataType::Fp32: - return prototype::Operand(_value[0][0], prototype::OperandType::ScalarFp32); - - case DataType::Fp16: - return prototype::Operand(_value[0][0], prototype::OperandType::ScalarFp16); - - default: - CKW_ASSERT(false); - } - } - else - { - return prototype::Operand(name()); - } - } - else - { - return prototype::Operand(name(), prototype::OperandType::Tile); - } -} - -const TileInfo &TileOperand::tile_info() const -{ - return _info; -} - -DataType TileOperand::data_type() const -{ - return _info.data_type(); -} - -bool TileOperand::is_constant() const -{ - return _constant; -} - -bool TileOperand::is_scalar() const -{ - return _info.width() == 1 && _info.height() == 1; -} - -std::string TileOperand::scalar_value() const -{ - CKW_ASSERT(is_scalar()); - CKW_ASSERT(is_constant()); - - return _value[0][0]; -} - -const TileContainer &TileOperand::value() const -{ - return _value; -} - -} // namespace ckw diff --git a/filelist.json b/filelist.json index 2f33b5cd5e..dcf3204ecd 100644 --- a/filelist.json +++ b/filelist.json @@ -2324,7 +2324,6 @@ "src/dynamic_fusion/sketch/attributes/ResizeAttributes.cpp", "src/dynamic_fusion/sketch/attributes/SoftmaxAttributes.cpp", "src/dynamic_fusion/sketch/attributes/ReshapeAttributes.cpp", - "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp", "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp", "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp", "src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp", @@ -2339,8 +2338,6 @@ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp", - "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp", "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp", @@ -2361,21 +2358,6 @@ "src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp", "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp" ], - "template_writer": [ - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp", - "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp" - ], "ckw_driver": [ "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp", "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp", diff --git a/scripts/clang_tidy_rules.py b/scripts/clang_tidy_rules.py index 1e1ab7f545..f244017dbd 100755 --- a/scripts/clang_tidy_rules.py +++ b/scripts/clang_tidy_rules.py @@ -1,7 +1,7 @@ #!/usr/bin/env python3 # -*- coding: utf-8 -*- # -# Copyright (c) 2017-2023 Arm Limited. +# Copyright (c) 2017-2024 Arm Limited. # # SPDX-License-Identifier: MIT # @@ -28,7 +28,7 @@ import re import sys def get_list_includes(): - return "compute_kernel_writer/prototype/include " \ + return "compute_kernel_writer/include " \ "src/cpu/kernels/assembly " \ "src/core/NEON/kernels/assembly " \ "src/core/NEON/kernels/convolution/winograd " \ @@ -43,8 +43,6 @@ def get_list_flags( filename, arch): flags.append("-DARM_COMPUTE_OPENCL_ENABLED") if arch == "aarch64": flags.append("-DARM_COMPUTE_AARCH64_V8_2") - if "ckw_driver" in filename: - flags.append("-DACL_INTERNAL_TEST_CKW_IN_DF") return flags diff --git a/scripts/generate_android_bp.py b/scripts/generate_android_bp.py index f7ecbc468b..6efd072acd 100755 --- a/scripts/generate_android_bp.py +++ b/scripts/generate_android_bp.py @@ -108,6 +108,7 @@ cc_library_static { proprietary: true, local_include_dirs: ["build/android-arm64v8a/src/core", "build/android-arm64v8a/src/core/CL", + "compute_kernel_writer/include", "src/core/common", "src/core/helpers", "src/core/NEON/kernels/arm_gemm", diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp index 9ca20fa152..eab5cddd07 100644 --- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp +++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,12 +26,11 @@ #include "arm_compute/core/CL/ICLTensor.h" #include "src/core/CL/CLUtils.h" -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h" -#endif // ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" #include "src/gpu/cl/ClKernelLibrary.h" #include "support/Cast.h" + namespace arm_compute { namespace experimental @@ -61,128 +60,6 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe _arguments = code.arguments(); } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - -inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, - const GpuKernelArgumentInfo &arg, - const ICLTensor *tensor, - const Window &arg_slice, - std::vector<cl::Image2D> &cl_images) -{ - ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); - - switch (arg.type) - { - case GpuKernelArgumentInfo::Type::Scalar: - { - ARM_COMPUTE_ERROR("Unsupported yet"); - break; - } - - case GpuKernelArgumentInfo::Type::Vector: - { - add_1D_tensor_argument(idx, tensor, arg_slice); - break; - } - - case GpuKernelArgumentInfo::Type::Image: - { - add_2D_tensor_argument(idx, tensor, arg_slice); - break; - } - case GpuKernelArgumentInfo::Type::Image_Reinterpret_As_3D: - { - add_2D_tensor_argument(idx, tensor, arg_slice); - const unsigned int total_cross_plane_pad = tensor->info()->padding().top + tensor->info()->padding().bottom; - _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(total_cross_plane_pad)); - break; - } - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - { - const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * - tensor->info()->dimension(2) * - tensor->info()->dimension(3)); - const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1]; - cl::Image2D tensor_image2d = - create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, - tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); - cl_images.push_back(tensor_image2d); - _kernel.setArg(idx++, tensor_image2d); - break; - } - - case GpuKernelArgumentInfo::Type::Image_3D: - { - add_2D_tensor_argument(idx, tensor, arg_slice); - _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2])); - break; - } - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - { - const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * - tensor->info()->dimension(2) * - tensor->info()->dimension(3)); - const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1]; - cl::Image2D tensor_image2d = - create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, - tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly); - cl_images.push_back(tensor_image2d); - _kernel.setArg(idx++, tensor_image2d); - _kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2])); - break; - } - - case GpuKernelArgumentInfo::Type::Tensor_3D: - { - add_3D_tensor_argument(idx, tensor, arg_slice); - break; - } - - case GpuKernelArgumentInfo::Type::Tensor_4D: - { - add_4D_tensor_argument(idx, tensor, arg_slice); - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer: - { - add_4d_tensor_nhwc_argument(idx, tensor); - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - { - const size_t image_w = tensor->info()->dimension(0) / 4; - const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1); - const size_t image_stride_y = tensor->info()->strides_in_bytes()[1]; - - cl::Image2D tensor_image2d = create_image2d_from_buffer( - CLKernelLibrary::get().context(), tensor->cl_buffer(), TensorShape(image_w, image_h), - tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly); - cl_images.push_back(tensor_image2d); - - _kernel.setArg(idx++, tensor_image2d); - add_4d_tensor_nhwc_argument(idx, tensor); - break; - } - case GpuKernelArgumentInfo::Type::Tensor_Special_0: - { - const ITensorInfo *info = tensor->info(); - const Strides &strides = info->strides_in_bytes(); - - _kernel.setArg(idx++, tensor->cl_buffer()); - const size_t dim1xdim2 = info->tensor_shape()[1] * info->tensor_shape()[2]; - _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(dim1xdim2)); - const size_t stride1 = strides[1]; - _kernel.setArg<cl_int>(idx++, static_cast<int32_t>(stride1)); - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported"); - } - } -} - -#else // ACL_INTERNAL_TEST_CKW_IN_DF inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, @@ -234,7 +111,6 @@ inline void ClKernelRuntime::add_kernel_argument(unsigned int } } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) { ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); @@ -253,17 +129,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com // Set kernel arguments // CLImages created from tensor arguments. Need to be retained until enqueue std::vector<cl::Image2D> cl_images; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - for (auto id_arg : _arguments) - { - const auto arg = id_arg.second; - auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first)); - ARM_COMPUTE_ERROR_ON_NULLPTR(tensor); - ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info()); - add_tensor_argument(idx, *arg.kernel_argument_info(), tensor, slice, cl_images); - } -#else // ACL_INTERNAL_TEST_CKW_IN_DF for (const auto &arg : _arguments) { auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id())); @@ -271,7 +137,6 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com ARM_COMPUTE_ERROR_ON_NULLPTR(tensor->info()); add_kernel_argument(idx, arg, tensor, cl_images); } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF // Dispatch kernel enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items); diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h index e78567eb9d..148e4db581 100644 --- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h +++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME -#define SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME +#ifndef ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H +#define ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" @@ -59,21 +59,6 @@ public: virtual void run_op(ITensorPack &tensors, const Window &window, cl::CommandQueue &queue) override; private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - /** Set a kernel tensor argument - * - * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. - * @param[in] arg Kernel argument descriptor accompanying @p tensor - * @param[in] tensor Tensor to set as an argument of the object's kernel - * @param[in] arg_slice Window the kernel will be run on - * @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued) - */ - inline void add_tensor_argument(unsigned int &idx, - const GpuKernelArgumentInfo &arg, - const ICLTensor *tensor, - const Window &arg_slice, - std::vector<cl::Image2D> &cl_images); -#else // ACL_INTERNAL_TEST_CKW_IN_DF /** Set a kernel argument as part of a tensor * * @param[in,out] idx Index at which to start adding the tensor's arguments. Will be incremented by the number of kernel arguments set. @@ -85,7 +70,6 @@ private: const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector<cl::Image2D> &cl_images); -#endif // ACL_INTERNAL_TEST_CKW_IN_DF private: GpuKernelArgumentList _arguments{}; @@ -94,4 +78,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME */ +#endif // ACL_SRC_DYNAMIC_FUSION_RUNTIME_GPU_CL_CLKERNELRUNTIME_H diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp deleted file mode 100644 index 9cecfc2ffd..0000000000 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp +++ /dev/null @@ -1,37 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1) -{ - return info0.type == info1.type; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h index 03817173f4..c923bf9c16 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H #include "arm_compute/core/TensorInfo.h" @@ -32,96 +32,6 @@ namespace experimental { namespace dynamic_fusion { -/** Contain information required to set up a kernel argument at run time - * @deprecated To be removed along with ClTemplateWriter - */ -struct GpuKernelArgumentInfo -{ - /** Enumerate all the tensor arguments variants used by all kernel implementations. */ - enum class Type : int - { - Scalar, - - Vector, - - Image, - Image_Reinterpret_As_3D, - Image_Export_To_ClImage2D, - - Image_3D, // 3D Tensor represented as a 2D Image + stride_z - Image_3D_Export_To_ClImage2D, - - Tensor_3D, - Tensor_4D, - Tensor_4D_t_Buffer, - Tensor_4D_t_Image, - - Tensor_Special_0, - }; - /** Default constructor */ - GpuKernelArgumentInfo() = default; - /** Constructor */ - GpuKernelArgumentInfo(Type type) : type{type} - { - } - Type type{Type::Tensor_4D_t_Buffer}; -}; -bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1); -/** Kernel argument information linked with its corresponding @ref ITensorInfo - * @deprecated To be removed along with ClTemplateWriter - */ -class GpuKernelArgument -{ -public: - /** Constructor - * - * @param[in] tensor_info Associated @ref ITensorInfo - * @param[in] kernel_arg_info Associated @ref GpuKernelArgumentInfo - */ - GpuKernelArgument(const ITensorInfo &tensor_info, const GpuKernelArgumentInfo &kernel_arg_info) - : _tensor_info{tensor_info}, _kernel_arg_info{kernel_arg_info} - { - } - /** Get workload tensor id */ - ITensorInfo::Id id() const - { - return _tensor_info.id(); - } - /** Get associated @ref ITensorInfo */ - ITensorInfo *tensor_info() - { - return &_tensor_info; - } - /** Get associated @ref ITensorInfo */ - const ITensorInfo *tensor_info() const - { - return &_tensor_info; - } - /** Get associated @ref GpuKernelArgumentInfo */ - GpuKernelArgumentInfo *kernel_argument_info() - { - return &_kernel_arg_info; - } - /** Get associated @ref GpuKernelArgumentInfo */ - const GpuKernelArgumentInfo *kernel_argument_info() const - { - return &_kernel_arg_info; - } - /** Check if the associated workload tensor has valid id - * - * @return true if has valid id - * @return false otherwise - */ - bool has_valid_id() const - { - return _tensor_info.has_valid_id(); - } - -private: - TensorInfo _tensor_info{}; - GpuKernelArgumentInfo _kernel_arg_info{}; -}; -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF /** Describe how the tensor runtime memory can be accessed * * Please see documentation under @ref GpuKernelArgumentBinding @@ -243,9 +153,8 @@ private: }; Value _value; }; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELARGUMENT_H diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h index 24812cd8a7..11d916eec9 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h +++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,19 +21,15 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE -#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Window.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include <map> -#else // ACL_INTERNAL_TEST_CKW_IN_DF #include <deque> -#endif // ACL_INTERNAL_TEST_CKW_IN_DF #include <string> namespace arm_compute @@ -43,11 +39,7 @@ namespace experimental namespace dynamic_fusion { /** The argument list of a @ref GpuKernelSourceCode */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -using GpuKernelArgumentList = std::map<ITensorInfo::Id, GpuKernelArgument>; -#else // ACL_INTERNAL_TEST_CKW_IN_DF using GpuKernelArgumentList = std::deque<GpuKernelArgumentBinding>; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Container of kernel code to be compiled and run in a @ref GpuUnitWorkload */ @@ -132,4 +124,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELSOURCECODE_H diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp index 502ceab807..725a46e91c 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp +++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,14 +26,10 @@ #include "arm_compute/core/experimental/Types.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h" #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" #include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h" -#else // ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h" -#endif // ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -41,8 +37,8 @@ namespace experimental { namespace dynamic_fusion { -GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components) - : _comp_group{components}, _store_components{} +GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, GpuKernelComponentGroup components) // NOLINT + : _comp_group{std::move(components)}, _store_components{} { ARM_COMPUTE_UNUSED(services); } @@ -50,19 +46,11 @@ GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKern GpuKernelSourceCode GpuLogicalKernel::write_kernel_code() { GpuKernelSourceCode code; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - ClTemplateWriter writer{_comp_group}; -#else // ACL_INTERNAL_TEST_CKW_IN_DF - GpuCkwDriver writer{_comp_group}; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF + GpuCkwDriver writer{_comp_group}; code.name(writer.get_name()); code.code(writer.get_code()); -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - code.arguments(writer.get_tensors()); -#else // ACL_INTERNAL_TEST_CKW_IN_DF code.arguments(writer.get_kernel_arguments()); -#endif // ACL_INTERNAL_TEST_CKW_IN_DF code.build_options(writer.get_build_options()); code.config_id(writer.get_config_id()); code.window(writer.get_window()); diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h index 1fd40f0acd..e2bc83b286 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h +++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022 Arm Limited. + * Copyright (c) 2022, 2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h" @@ -52,7 +52,7 @@ public: * @param[in] services @ref GpuComponentServices to be used * @param[in] components Component group from which this logical kernel is initialized */ - explicit GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components); + explicit GpuLogicalKernel(GpuComponentServices *services, GpuKernelComponentGroup components); // NOLINT /** Allow instances of this class to be copy constructed */ GpuLogicalKernel(const GpuLogicalKernel &) = default; /** Allow instances of this class to be copied */ @@ -71,4 +71,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPULOGICALKERNEL_H diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h index 43bcc47fa0..5d75bcaaa0 100644 --- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h +++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H #include "arm_compute/core/experimental/Types.h" #include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h" @@ -36,7 +36,6 @@ namespace experimental { namespace dynamic_fusion { -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF namespace { /** Extract kernel arguments of one tensor from a flat list of kernel arguments. @@ -70,7 +69,6 @@ GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList & return tensor_kargs; } } // namespace -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */ using UnitWorkloadId = int32_t; @@ -83,25 +81,11 @@ class GpuWorkloadArgument public: /** Default constructor */ GpuWorkloadArgument() = default; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF /** Constructor * - * @param[in] tensor_info @ref ITensorInfo of the workload argument - * @param[in] mem_desc @ref MemoryDescriptor of the workload argument - * @param[in] kernel_arg_info @ref GpuKernelArgumentInfo of the workload argument - */ - GpuWorkloadArgument(const ITensorInfo &tensor_info, - const MemoryDescriptor &mem_desc, - const GpuKernelArgumentInfo &kernel_arg_info) - : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_arg_info{kernel_arg_info} - { - } -#else // ACL_INTERNAL_TEST_CKW_IN_DF - /** Constructor - * - * @param[in] tensor_info @ref ITensorInfo of the workload argument - * @param[in] mem_desc @ref MemoryDescriptor of the workload argument - * @param[in] kernel_arg_list @ref GpuKernelArgumentList of the workload argument + * @param[in] tensor_info @ref ITensorInfo of the workload argument + * @param[in] mem_desc @ref MemoryDescriptor of the workload argument + * @param[in] kernel_args @ref GpuKernelArgumentList of the workload argument */ GpuWorkloadArgument(const ITensorInfo &tensor_info, const MemoryDescriptor &mem_desc, @@ -109,7 +93,6 @@ public: : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_args{kernel_args} { } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Get tensor id within workload */ ITensorInfo::Id id() const { @@ -135,18 +118,6 @@ public: { return &_mem_desc; } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - /** Get @ref GpuKernelArgumentInfo of the argument */ - GpuKernelArgumentInfo *kernel_argument_info() - { - return &_kernel_arg_info; - } - /** Get @ref GpuKernelArgumentInfo of the argument */ - const GpuKernelArgumentInfo *kernel_argument_info() const - { - return &_kernel_arg_info; - } -#else // ACL_INTERNAL_TEST_CKW_IN_DF /** Get @ref GpuKernelArgumentList of the workload tensor */ GpuKernelArgumentList *kernel_argument_list() { @@ -157,7 +128,6 @@ public: { return &_kernel_args; } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Check if the workload argument has valid id * * @return true If has valid id @@ -169,13 +139,9 @@ public: } private: - TensorInfo _tensor_info{}; - MemoryDescriptor _mem_desc{}; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - GpuKernelArgumentInfo _kernel_arg_info{}; -#else // ACL_INTERNAL_TEST_CKW_IN_DF + TensorInfo _tensor_info{}; + MemoryDescriptor _mem_desc{}; GpuKernelArgumentList _kernel_args{}; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; /** Describes when a unit workload is run. @@ -259,22 +225,7 @@ public: const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size()); const auto unit_work = GpuUnitWorkload(uwk_id, kernel_code, stage); _unit_workloads.push_back(unit_work); -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - ARM_COMPUTE_UNUSED(context); - // Assemble kernel argument with memory descriptor to form workload argument - for (const auto &id_arg : kernel_code.arguments()) - { - const auto arg_id = id_arg.first; - const auto arg = id_arg.second; - _workload_arguments[arg_id] = - GpuWorkloadArgument{*arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info()}; - if (_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end()) - { - _tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>(); - } - _tensor_uwork_map[arg_id].insert(uwk_id); - } -#else // ACL_INTERNAL_TEST_CKW_IN_DF + GpuKernelArgumentList flat_kernel_args = kernel_code.arguments(); GpuKernelArgumentList tensor_kargs{}; while (true) @@ -296,7 +247,7 @@ public: _tensor_uwork_map[tensor_id].insert(uwk_id); } } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF + return uwk_id; } /** Get a unit workload from its id */ @@ -346,4 +297,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSOURCECODE_H diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h index ad474674f9..84972501de 100644 --- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h +++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H #include "arm_compute/core/CL/CLCompileContext.h" #include "arm_compute/core/Window.h" @@ -62,23 +62,14 @@ public: virtual std::string get_config_id() = 0; /** Generate execution window */ virtual Window get_window() const = 0; - /** Get the kernel argument lists of the kernel - * @deprecated To be removed along with ClTemplateWriter - */ - virtual std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() - { - return {}; - } -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF /** Get the flat list of arguments of the kernel*/ virtual GpuKernelArgumentList get_kernel_arguments() { return {}; } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_IGPUKERNELWRITER_H diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h index b80ce0d816..f8770920b7 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h @@ -24,15 +24,12 @@ #ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H #define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER_H -#include "ckw/Kernel.h" - #include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" #include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h" #include "compute_kernel_writer/include/ckw/Kernel.h" #include "compute_kernel_writer/include/ckw/KernelArgument.h" -#include <map> #include <string> namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h index f1f0e6747b..c9ce7eb269 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE -#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H #include "src/core/common/Macros.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h" @@ -33,8 +33,6 @@ namespace experimental { namespace dynamic_fusion { -/** An interface used by @ref ClTemplateWriter to write source code for a kernel component - */ class GpuCkwStore : public IGpuCkwComponentDriver { public: @@ -61,4 +59,4 @@ private: } // namespace experimental } // namespace arm_compute -#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_GPUCKWSTORE_H diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h index 4b8eea2f57..6678c929e9 100644 --- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h +++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT -#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h" @@ -100,10 +100,6 @@ public: return _properties; } /** Get writer for the component */ - virtual const IGpuTemplateComponentWriter *template_writer() const - { - return nullptr; - } virtual const IGpuCkwComponentDriver *ckw_component_driver() const { return nullptr; @@ -119,4 +115,4 @@ private: } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp index fdf528a65d..e316bdf46d 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,7 @@ #include "ClComponentActivation.h" #include "src/core/CL/CLValidate.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -69,11 +65,7 @@ ClComponentActivation::ClComponentActivation(ComponentId const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateActivation>(id, tensors, attributes)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwActivation>(id, tensors, attributes)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { } @@ -81,11 +73,7 @@ ClComponentActivation::~ClComponentActivation() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentActivation::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h index 02c854356a..b8185158f3 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H #include "arm_compute/function_info/ActivationLayerInfo.h" @@ -41,11 +41,7 @@ template <typename T> class ArgumentPack; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateActivation; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwActivation; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentActivation final : public IGpuKernelComponent { @@ -106,11 +102,7 @@ public: ClComponentActivation &operator=(ClComponentActivation &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override @@ -119,13 +111,9 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateActivation> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwActivation> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp index b1636795a3..e1850d78c4 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,11 +27,7 @@ #include "src/core/CL/CLValidate.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -72,22 +68,16 @@ ClComponentCast::ClComponentCast(ComponentId id, const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateCast>(id, tensors, attributes)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwCast>(id, tensors, attributes)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { ARM_COMPUTE_UNUSED(attributes, settings); } + ClComponentCast::~ClComponentCast() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentCast::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ClComponentCast::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h index ed77b1203b..201dacc288 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" @@ -49,11 +49,7 @@ private: }; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateCast; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwCast; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentCast final : public IGpuKernelComponent { @@ -120,11 +116,7 @@ public: /** Allow instances of this class to be moved */ ClComponentCast &operator=(ClComponentCast &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override { @@ -132,14 +124,10 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateCast> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<GpuCkwCast> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr<GpuCkwCast> _component_writer; }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp index ca8037c393..7cd23d6115 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,11 +28,7 @@ #include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" #include "src/core/CL/CLValidate.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDepthwiseConv2d.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -212,22 +208,14 @@ ClComponentDepthwiseConv2d::ClComponentDepthwiseConv2d(ComponentId const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateDepthwiseConv2d>(id, tensors, attributes, settings)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwDepthwiseConv2d>(id, tensors, attributes, settings)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { ARM_COMPUTE_UNUSED(attributes, settings); } ClComponentDepthwiseConv2d::~ClComponentDepthwiseConv2d() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentDepthwiseConv2d::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentDepthwiseConv2d::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h index 01168e9ded..7526361f1c 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -44,11 +44,7 @@ class ArgumentPack; class DepthwiseConv2dAttributes; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateDepthwiseConv2d; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwDepthwiseConv2d; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Component specific settings */ @@ -161,13 +157,8 @@ public: ClComponentDepthwiseConv2d(ClComponentDepthwiseConv2d &&component) = default; /** Allow instances of this class to be moved */ ClComponentDepthwiseConv2d &operator=(ClComponentDepthwiseConv2d &&component) = default; - /** Get template writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF - + /** Get writer for the component */ + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -175,11 +166,7 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateDepthwiseConv2d> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwDepthwiseConv2d> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp index 98f3d6a882..783a17df30 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -28,12 +28,7 @@ #include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" #include "src/core/CL/CLValidate.h" - -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h" -#else // ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h" -#endif // ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -153,11 +148,7 @@ ClComponentDirectConv2d::ClComponentDirectConv2d(ComponentId const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings)} -#else // ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwDirectConv2d>(id, tensors, attributes, settings)} -#endif // ACL_INTERNAL_TEST_CKW_IN_DF { } @@ -165,11 +156,7 @@ ClComponentDirectConv2d::~ClComponentDirectConv2d() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentDirectConv2d::template_writer() const -#else // ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentDirectConv2d::ckw_component_driver() const -#endif // ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h index d6d9705d3c..c50b0fa0ce 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H #include "arm_compute/core/Error.h" #include "arm_compute/core/KernelDescriptors.h" @@ -68,11 +68,7 @@ private: }; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateDirectConv2d; -#else // ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwDirectConv2d; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentDirectConv2d final : public IGpuKernelComponent { @@ -139,11 +135,7 @@ public: /** Allow instances of this class to be moved */ ClComponentDirectConv2d &operator=(ClComponentDirectConv2d &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else // ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -151,13 +143,9 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateDirectConv2d> _component_writer; -#else // ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwDirectConv2d> _component_writer; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDIRECTCONV2D_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index 5b136427e4..209c73dbee 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -26,11 +26,7 @@ #include "arm_compute/core/Validate.h" #include "src/core/CL/CLValidate.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -117,19 +113,11 @@ ClComponentElementwiseBinary::ClComponentElementwiseBinary(ComponentId const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentElementwiseBinary::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentElementwiseBinary::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h index 7589b9732c..a4395a6219 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY -#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" @@ -40,11 +40,7 @@ template <typename T> class ArgumentPack; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateElementwiseBinary; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwElementwiseBinary; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentElementwiseBinary final : public IGpuKernelComponent { @@ -105,12 +101,7 @@ public: /** Allow instances of this class to be moved */ ClComponentElementwiseBinary &operator=(ClComponentElementwiseBinary &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF - + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -118,13 +109,9 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateElementwiseBinary> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwElementwiseBinary> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTELEMENTWISEBINARY_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp deleted file mode 100644 index 27c13bd654..0000000000 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp +++ /dev/null @@ -1,93 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" - -#include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -Status ClComponentLogits1DMaxShiftExpSum::validate(const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) -{ - ARM_COMPUTE_UNUSED(properties, attributes); - - const ITensorInfo *src = tensors.get_const_tensor(TensorType::ACL_SRC_0); - const ITensorInfo *sum = tensors.get_const_tensor(TensorType::ACL_DST_0); - const ITensorInfo *dst = tensors.get_const_tensor(TensorType::ACL_DST_1); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); - - // 1. Check validity - // All tensor infos are initialized - ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(sum->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0); - - // Check for mismatches in shapes and data types - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst, sum); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst); - - // Device requirements are met - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); - - // 2. Check support level - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); - - return Status{}; -} - -ClComponentLogits1DMaxShiftExpSum::ClComponentLogits1DMaxShiftExpSum(ComponentId id, - const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuKernelComponent{id, properties, tensors}, - _component_writer{std::make_unique<ClTemplateLogits1DMaxShiftExpSum>(id, tensors, attributes)} -{ -} - -ClComponentLogits1DMaxShiftExpSum::~ClComponentLogits1DMaxShiftExpSum() -{ -} - -const IGpuTemplateComponentWriter *ClComponentLogits1DMaxShiftExpSum::template_writer() const -{ - return _component_writer.get(); -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h deleted file mode 100644 index 91ab5de3b5..0000000000 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h +++ /dev/null @@ -1,130 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM - -#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" - -namespace arm_compute -{ -/** Forward declaration */ -class ITensorInfo; -namespace experimental -{ -namespace dynamic_fusion -{ -/** Forward declaration */ -template <typename T> -class ArgumentPack; - -/** Forward declaration */ -class ClTemplateLogits1DMaxShiftExpSum; - -/** Component to calculate max-shifted exponentials and their sum - * - * 1D example: - * input: [x1, x2, ... , xn], shape: (1 x d) - * - * Let max(x1...xn) = m - * - * (output) sum: [exp(x1-m) + ... + exp(xn-m)], shape: (1 x 1) - * (output) dst: [exp(x1-m) ... exp(xn-m)], shape: (1 x d) - * - * This component is used by the softmax operator. The subsequent - * operation normalizes dst with sum, therefore the max-shifting - * since exp(m) will be cancelled in numerator and denominator. -*/ -class ClComponentLogits1DMaxShiftExpSum final : public IGpuKernelComponent -{ -public: - /** Attributes are a set of backend-agnostic parameters that define what a component does */ - using Attributes = SoftmaxAttributes; - - /** Validate the component - * - * @param[in] properties Component properties @ref Properties - * @param[in] tensors Tensor arguments to the component - * @param[in] attributes Component attributes @ref Attributes - * - * @return Status Validation results - * - * Tensor argument names: - * - ACL_SRC_0: Input - * - ACL_DST_0: Output - * - ACL_DST_1: Output - * - * Tensor argument constness: - * - ACL_SRC_0: Const - * - ACL_DST_0: Const - * - ACL_DST_1: Const - * - * Valid data layouts: - * - All - * - ** Valid data type configurations: - * |ACL_SRC_0 |ACL_DST_0 |ACL_DST_1 | - * |:----------|:----------|:----------| - * |F16 | F16 | F16 | - * |F32 | F32 | F32 | - */ - static Status - validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Constructor - * - * Similar to @ref ClComponentLogits1DMaxShiftExpSum::validate() - */ - ClComponentLogits1DMaxShiftExpSum(ComponentId id, - const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes); - - /** Destructor */ - ~ClComponentLogits1DMaxShiftExpSum() override; - /** Prevent instances of this class from being copy constructed */ - ClComponentLogits1DMaxShiftExpSum(const ClComponentLogits1DMaxShiftExpSum &component) = delete; - /** Prevent instances of this class from being copied */ - ClComponentLogits1DMaxShiftExpSum &operator=(const ClComponentLogits1DMaxShiftExpSum &component) = delete; - /** Allow instances of this class to be move constructed */ - ClComponentLogits1DMaxShiftExpSum(ClComponentLogits1DMaxShiftExpSum &&component) = default; - /** Allow instances of this class to be moved */ - ClComponentLogits1DMaxShiftExpSum &operator=(ClComponentLogits1DMaxShiftExpSum &&component) = default; - /** Get template writer for the component */ - const IGpuTemplateComponentWriter *template_writer() const override; - /** Get component type */ - GpuComponentType type() const override - { - return GpuComponentType::Unfusable; - } - -private: - std::unique_ptr<ClTemplateLogits1DMaxShiftExpSum> _component_writer; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM */ diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp deleted file mode 100644 index fb2544385c..0000000000 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp +++ /dev/null @@ -1,95 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/Validate.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" - -#include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -Status ClComponentLogits1DNorm::validate(const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) -{ - ARM_COMPUTE_UNUSED(properties, attributes); - - const ITensorInfo *src = tensors.get_const_tensor(TensorType::ACL_SRC_0); - const ITensorInfo *sum = tensors.get_const_tensor(TensorType::ACL_SRC_1); - const ITensorInfo *dst = tensors.get_const_tensor(TensorType::ACL_DST_0); - - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(sum); - ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(dst); - - // 1. Check validity - // All tensor infos are initialized - ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(sum->tensor_shape().total_size() == 0); - ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0); - - // Check for mismatches in shapes and data types - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst, sum); - ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst); - - ARM_COMPUTE_RETURN_ERROR_ON(attributes.is_log_softmax() && !is_data_type_float(src->data_type())); - - // Device requirements are met - ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src); - - // 2. Check support level - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32); - - return Status{}; -} - -ClComponentLogits1DNorm::ClComponentLogits1DNorm(ComponentId id, - const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuKernelComponent{id, properties, tensors}, - _component_writer{std::make_unique<ClTemplateLogits1DNorm>(id, tensors, attributes)} -{ -} - -ClComponentLogits1DNorm::~ClComponentLogits1DNorm() -{ -} - -const IGpuTemplateComponentWriter *ClComponentLogits1DNorm::template_writer() const -{ - return _component_writer.get(); -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h deleted file mode 100644 index 74c0273604..0000000000 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h +++ /dev/null @@ -1,127 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM - -#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" - -namespace arm_compute -{ -/** Forward declaration */ -class ITensorInfo; -namespace experimental -{ -namespace dynamic_fusion -{ -/** Forward declaration */ -template <typename T> -class ArgumentPack; - -/** Forward declaration */ -class ClTemplateLogits1DNorm; - -/** Component to calculate the final step of the Softmax Layer - * where each logit value is multiplied by the inverse of the sum of the logits. - * - * 1D example: - * - * (input) src: [x1 x2 ... xn], shape: (1 x d) - * (input) sum: [x1 + x2 + ... + xn], shape: (1 x 1) - * (output) dst: [x1/sum x2/sum ... xn/sum], shape: (1 x d) - * - * This component is used by the softmax operator to get the final result. -*/ -class ClComponentLogits1DNorm final : public IGpuKernelComponent -{ -public: - /** Attributes are a set of backend-agnostic parameters that define what a component does */ - using Attributes = SoftmaxAttributes; - - /** Validate the component - * - * @param[in] properties Component properties @ref Properties - * @param[in] tensors Tensor arguments to the component - * @param[in] attributes Component attributes @ref Attributes - * - * @return Status Validation results - * - * Tensor argument names: - * - ACL_SRC_0: Input - * - ACL_SRC_1: Input - * - ACL_DST_0: Output - * - * Tensor argument constness: - * - ACL_SRC_0: Const - * - ACL_SRC_1: Const - * - ACL_DST_0: Const - * - * Valid data layouts: - * - All - * - ** Valid data type configurations: - * |ACL_SRC_0 |ACL_SRC_1 |ACL_DST_0 | - * |:----------|:----------|:----------| - * |F16 | F16 | F16 | - * |F32 | F32 | F32 | - */ - static Status - validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Constructor - * - * Similar to @ref ClComponentLogits1DNorm::validate() - */ - ClComponentLogits1DNorm(ComponentId id, - const Properties &properties, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes); - - /** Destructor */ - ~ClComponentLogits1DNorm() override; - /** Prevent instances of this class from being copy constructed */ - ClComponentLogits1DNorm(const ClComponentLogits1DNorm &component) = delete; - /** Prevent instances of this class from being copied */ - ClComponentLogits1DNorm &operator=(const ClComponentLogits1DNorm &component) = delete; - /** Allow instances of this class to be move constructed */ - ClComponentLogits1DNorm(ClComponentLogits1DNorm &&component) = default; - /** Allow instances of this class to be moved */ - ClComponentLogits1DNorm &operator=(ClComponentLogits1DNorm &&component) = default; - /** Get template writer for the component */ - const IGpuTemplateComponentWriter *template_writer() const override; - /** Get component type */ - GpuComponentType type() const override - { - return GpuComponentType::Unfusable; - } - -private: - std::unique_ptr<ClTemplateLogits1DNorm> _component_writer; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM */ diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp index f238d42d98..53ac8da41f 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentMatMul.h" @@ -147,5 +146,3 @@ const IGpuCkwComponentDriver *ClComponentMatMul::ckw_component_driver() const } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute - -#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp index 5544963b3f..6e7243dc04 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp @@ -30,7 +30,6 @@ #include "src/core/CL/CLValidate.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h" #include "src/dynamic_fusion/utils/Utils.h" #include <memory> @@ -93,27 +92,16 @@ ClComponentPool2d::ClComponentPool2d(ComponentId id, const Attributes &attributes, const Settings &settings) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplatePool2d>(id, tensors, attributes, settings)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwPool2d>(id, tensors, attributes, settings)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { } ClComponentPool2d::~ClComponentPool2d() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentPool2d::template_writer() const -{ - return _component_writer.get(); -} -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentPool2d::ckw_component_driver() const { return _component_writer.get(); } -#endif //ACL_INTERNAL_TEST_CKW_IN_DF } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h index 98fed65004..d33e601f18 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -42,11 +42,7 @@ class ArgumentPack; class Pool2dAttributes; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplatePool2d; -#else // ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwPool2d; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentPool2d final : public IGpuKernelComponent { @@ -116,13 +112,9 @@ public: /** Allow instances of this class to be moved */ ClComponentPool2d &operator=(ClComponentPool2d &&component) = default; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - /** Get template writer for the component */ - const IGpuTemplateComponentWriter *template_writer() const override; -#else // ACL_INTERNAL_TEST_CKW_IN_DF + /** Get GPU kernel writer for the component */ const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override @@ -131,11 +123,7 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplatePool2d> _component_writer; -#else // ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwPool2d> _component_writer; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp index 0ece9de970..dce85c424e 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -27,7 +27,6 @@ #include "arm_compute/core/Validate.h" #include "src/core/CL/CLValidate.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h" namespace arm_compute { @@ -54,15 +53,16 @@ Status ClComponentReshape::validate(const ArgumentPack<ITensorInfo> &tensors) ClComponentReshape::ClComponentReshape(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors) - : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<ClTemplateReshape>(id, tensors)} + : IGpuKernelComponent{id, properties, tensors} { } ClComponentReshape::~ClComponentReshape() { } -const IGpuTemplateComponentWriter *ClComponentReshape::template_writer() const +const IGpuCkwComponentDriver *ClComponentReshape::ckw_component_driver() const { - return _component_writer.get(); + /* NOT IMPLEMENTED */ + return nullptr; } } // namespace dynamic_fusion } // namespace experimental diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h index 78163d6603..fd0f966da1 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" @@ -85,8 +85,8 @@ public: ClComponentReshape(ClComponentReshape &&component) = default; /** Allow instances of this class to be moved */ ClComponentReshape &operator=(ClComponentReshape &&component) = default; - /** Get template writer for the component */ - const IGpuTemplateComponentWriter *template_writer() const override; + /** Get writer for the component */ + const IGpuCkwComponentDriver *ckw_component_driver() const override; /** Get component type */ GpuComponentType type() const override { @@ -94,10 +94,9 @@ public: } private: - std::unique_ptr<ClTemplateReshape> _component_writer; }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESHAPE_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp index b05eb04698..411eeca802 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -29,12 +29,7 @@ #include "src/core/CL/CLValidate.h" #include "src/core/utils/ScaleUtils.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" - -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h" -#else // ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.h" -#endif // ACL_INTERNAL_TEST_CKW_IN_DF namespace arm_compute { @@ -43,11 +38,7 @@ namespace experimental namespace dynamic_fusion { /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateResize; -#else // ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwResize; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF Status ClComponentResize::validate(const IGpuKernelComponent::Properties &properties, const ArgumentPack<ITensorInfo> &tensors, @@ -82,11 +73,7 @@ ClComponentResize::ClComponentResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const ClComponentResize::Attributes &attributes) : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateResize>(id, tensors, attributes)} -#else // ACL_INTERNAL_TEST_CKW_IN_DF _component_writer{std::make_unique<GpuCkwResize>(id, tensors, attributes)} -#endif // ACL_INTERNAL_TEST_CKW_IN_DF { } @@ -94,11 +81,7 @@ ClComponentResize::~ClComponentResize() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentResize::template_writer() const -#else // ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentResize::ckw_component_driver() const -#endif // ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h index 29276c3257..9a1169c45f 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -22,8 +22,8 @@ * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H #include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h" @@ -42,11 +42,7 @@ template <typename T> class ArgumentPack; /** Forward declaration */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateResize; -#else // ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwResize; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentResize final : public IGpuKernelComponent { @@ -111,11 +107,7 @@ public: ClComponentResize &operator=(ClComponentResize &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else // ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override @@ -124,15 +116,11 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateResize> _component_writer; -#else // ACL_INTERNAL_TEST_CKW_IN_DF std::unique_ptr<GpuCkwResize> _component_writer; -#endif // ACL_INTERNAL_TEST_CKW_IN_DF }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE_H diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp index dcbecaff35..3db6c5cd2d 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,11 +24,7 @@ #include "ClComponentStore.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h" -#else //ACL_INTERNAL_TEST_CKW_IN_DF #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h" -#endif //ACL_INTERNAL_TEST_CKW_IN_DF #include <memory> @@ -46,22 +42,13 @@ Status ClComponentStore::validate(const Properties &properties, const ArgumentPa ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors) - : IGpuKernelComponent{id, properties, tensors}, -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<ClTemplateStore>(id, tensors)} -#else //ACL_INTERNAL_TEST_CKW_IN_DF - _component_writer{std::make_unique<GpuCkwStore>(id, tensors)} -#endif //ACL_INTERNAL_TEST_CKW_IN_DF + : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<GpuCkwStore>(id, tensors)} { } ClComponentStore::~ClComponentStore() { } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -const IGpuTemplateComponentWriter *ClComponentStore::template_writer() const -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ClComponentStore::ckw_component_driver() const -#endif //ACL_INTERNAL_TEST_CKW_IN_DF { return _component_writer.get(); } diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h index 948785c480..2c1dd0f6fc 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE +#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H +#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H #include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" @@ -39,11 +39,7 @@ namespace dynamic_fusion /** Forward declaration */ template <typename T> class ArgumentPack; -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF -class ClTemplateStore; -#else //ACL_INTERNAL_TEST_CKW_IN_DF class GpuCkwStore; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF class ClComponentStore final : public IGpuKernelComponent { @@ -88,11 +84,7 @@ public: /** Allow instances of this class to be moved */ ClComponentStore &operator=(ClComponentStore &&component) = default; /** Get writer for the component */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - const IGpuTemplateComponentWriter *template_writer() const override; -#else //ACL_INTERNAL_TEST_CKW_IN_DF const IGpuCkwComponentDriver *ckw_component_driver() const override; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF /** Get component type */ GpuComponentType type() const override { @@ -100,13 +92,9 @@ public: } private: -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<ClTemplateStore> _component_writer; -#else //ACL_INTERNAL_TEST_CKW_IN_DF - std::unique_ptr<GpuCkwStore> _component_writer; -#endif //ACL_INTERNAL_TEST_CKW_IN_DF + std::unique_ptr<GpuCkwStore> _component_writer; }; } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE */ +#endif // ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE_H diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp index 697b7d4e1f..4d6e7f81bb 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2022-2023 Arm Limited. + * Copyright (c) 2022-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -30,7 +30,6 @@ #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" namespace arm_compute { diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp index e24629a036..2997b28ec1 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMatMul.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,7 +21,6 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMatMul.h" @@ -244,4 +243,3 @@ ITensorInfo *GpuMatMul::create_op(GpuWorkloadSketch &sketch, } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute -#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp index 431c9110fc..d385752201 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp @@ -28,8 +28,6 @@ #include "src/common/utils/Log.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h" #include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" @@ -88,9 +86,8 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context, arguments_norm.add_const_tensor(ACL_SRC_1, &sum); arguments_norm.add_const_tensor(ACL_DST_0, &dst_info_to_validate); - ARM_COMPUTE_RETURN_ON_ERROR( - ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes)); - ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DNorm::validate(properties, arguments_norm, attributes)); + ARM_COMPUTE_UNUSED(properties, attributes); + return Status(ErrorCode::RUNTIME_ERROR, "GpuSoftmax is not implemented"); } else { @@ -177,8 +174,8 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorI arguments_norm.add_const_tensor(ACL_SRC_1, sum); arguments_norm.add_const_tensor(ACL_DST_0, dst); - comp_graph.add_new_component<ClComponentLogits1DMaxShiftExpSum>(properties, arguments_exp_sum, attributes); - comp_graph.add_new_component<ClComponentLogits1DNorm>(properties, arguments_norm, attributes); + // Add to component graph -- NOT IMPLEMENTED + ARM_COMPUTE_UNUSED(comp_graph, attributes); } } else diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp index bf0f274c5c..b9d01966b3 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2023 Arm Limited. + * Copyright (c) 2023-2024 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -31,7 +31,6 @@ #include "src/dynamic_fusion/sketch/ArgumentPack.h" #include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" #include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h" namespace arm_compute { diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp deleted file mode 100644 index 775b0a0c8c..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp +++ /dev/null @@ -1,114 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "GpuKernelVariableTable.h" - -#include "arm_compute/core/CL/CLHelpers.h" -#include "arm_compute/core/ITensorInfo.h" - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, - const ITensorInfo *tensor, - GpuKernelArgumentInfo argument_info, - const std::string &alias) -{ - ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected"); - - // Do not re-declare if the variable associated with the tensor has already been declared - auto it = _vars.find(tensor->id()); - - if (it != _vars.end()) - { - ARM_COMPUTE_ERROR_ON(!(it->second.kernel_argument_info == argument_info)); - return; - } - - const auto target = comp_group.get_tile_for_tensor(tensor); - - if (target != tensor) - { - // If the tensor uses a shared tile, don't declare another variable. - it = _vars.find(target->id()); - - ARM_COMPUTE_ERROR_ON_MSG(it == _vars.end(), "The variable used for this tensor must have been declared."); - - _vars[tensor->id()] = it->second; - } - else - { - // Declare variable associated with the tensor - std::stringstream ss; - ss << alias << "_t" << abs(tensor->id()); - const auto uniq_name = ss.str(); - TensorVariable var{tensor->id(), uniq_name, argument_info}; - - _vars.emplace(tensor->id(), var); - } -} - -GpuKernelVariableTable::TensorVariable GpuKernelVariableTable::get_variable(const ITensorInfo *tensor) const -{ - const auto var = _vars.at(tensor->id()); - return var; -} - -GpuKernelVariableTable::VariableList -GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const -{ - VariableList vars{}; - for (const auto &tensor : tensors) - { - if (!tensor->has_valid_id()) - { - continue; - } - vars.push_back(get_variable(tensor)); - } - return vars; -} - -TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var) : value{var.uniq_name} -{ -} - -TagVal::TagVal(const std::string &val) : value{val} -{ -} - -TagVal::TagVal(const char *val) : value{std::string(val)} -{ -} - -TagVal::TagVal(const DataType &data_type) : value{get_cl_type_from_data_type(data_type)} -{ -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h deleted file mode 100644 index c17f131ada..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE - -#include "arm_compute/core/ITensorInfo.h" - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#include "support/AclRequires.h" -#include "support/StringSupport.h" - -#include <set> -#include <string> -#include <type_traits> -#include <unordered_map> - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class GpuKernelComponentGroup; - -/** A table of all the variables used in the kernel. - * Each kernel has exactly one variable table. - */ -class GpuKernelVariableTable -{ -public: - /** A tensor variable whose main purposes are: - * - Hold the newly assigned @ref GpuKernelArgumentInfo for the associated tensor info - * - Hold the generated variable name for the associated tensor info - */ - struct TensorVariable - { - public: - TensorVariable() = default; - TensorVariable(const TensorVariable &) = default; - TensorVariable &operator=(const TensorVariable &) = default; - ITensorInfo::Id id{ITensorInfo::invalid_tensor_id}; - std::string uniq_name{"empty"}; // Unique name, also the final variable name used in the built code - GpuKernelArgumentInfo kernel_argument_info{}; - bool has_valid_id() const - { - return id != ITensorInfo::invalid_tensor_id; - } - }; - using VariableList = std::vector<TensorVariable>; - -public: - /** Declare a @ref TensorVariable for a corresponding tensor info. - * - * @param[in] comp_group Component group the tensor belongs to - * @param[in] tensor Tensor info with which the new variable is associated - * @param[in] argument_info Kernel argument information - * @param[in] alias Alias for the variable. Will be used as part of the variable name - */ - void declare_variable(const GpuKernelComponentGroup &comp_group, - const ITensorInfo *tensor, - GpuKernelArgumentInfo argument_info, - const std::string &alias = "unnamed"); - /** Get the @ref TensorVariable associated with @p tensor - * - * @param[in] tensor Tensor info to be queried - * - * @return TensorVariable - */ - TensorVariable get_variable(const ITensorInfo *tensor) const; - /** Get the @ref TensorVariable list associated with @p tensors - * @note Empty tensors are skipped - * - * @param[in] tensors List of tensor infos to be queried - * - * @return VariableList - */ - VariableList get_variable_list(const std::vector<const ITensorInfo *> &tensors) const; - -private: - std::map<ITensorInfo::Id, TensorVariable> _vars{}; -}; - -/** A tag value will substitute a tag in a string template during its instantiation */ -struct TagVal -{ - /** Default constructor */ - TagVal() = default; - /** Construct a @ref TagVal from a @ref GpuKernelVariableTable::TensorVariable */ - TagVal(const GpuKernelVariableTable::TensorVariable &var); - /** Construct a @ref TagVal from an integral type */ - template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)> - TagVal(T val) : value{support::cpp11::to_string(val)} - { - } - /** Construct a @ref TagVal from a string */ - TagVal(const std::string &val); - /** Construct a @ref TagVal from a c-style string */ - TagVal(const char *val); - /** Construct a @ref TagVal from a @ref DataType */ - TagVal(const DataType &data_type); - /** Get the value of the TagVal as a converted string */ - std::string value{}; -}; - -/** A tag used in a string template is a placeholder string to be substituted by real values during template instantiation */ -using Tag = std::string; - -/** Tag lookup table. It is used to instantiate a string template */ -using TagLUT = std::unordered_map<Tag, TagVal>; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h deleted file mode 100644 index 9d0b4f592a..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h +++ /dev/null @@ -1,140 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER - -#include "arm_compute/core/CL/CLCompileContext.h" -#include "arm_compute/core/ITensorInfo.h" -#include "arm_compute/core/Window.h" - -#include "src/dynamic_fusion/sketch/ArgumentPack.h" -#include "src/dynamic_fusion/sketch/gpu/components/Types.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/** Forward declaration */ -class GpuKernelComponentGroup; -class GpuKernelVariableTable; - -/** An interface used by @ref ClTemplateWriter to write source code for a kernel component - */ -class IGpuTemplateComponentWriter -{ -public: - using ComponentGroup = GpuKernelComponentGroup; - - /**For now all kernel intermeditate/destination tensors are expected to be of type Tensor_4D_t_Buffer*/ - static constexpr GpuKernelArgumentInfo::Type common_tensor_type = GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - -public: - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - */ - IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors} - { - } - /** Destructor */ - virtual ~IGpuTemplateComponentWriter() - { - } - /** Generate kernel component name */ - virtual std::string get_name() const = 0; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - virtual std::string get_component_code(const ComponentGroup &comp_group) const = 0; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - virtual void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - virtual TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const = 0; - /** Generate additional macros used in the component */ - virtual std::string get_additional_macros() const - { - return ""; - } - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - virtual CLBuildOptions get_build_options(const ComponentGroup &comp_group) const - { - ARM_COMPUTE_UNUSED(comp_group); - return CLBuildOptions{}; - } - /** Generate the component config id string used for tuning */ - virtual std::string get_config_id() const - { - return ""; - } - /** Generate the header list used in the component */ - virtual std::set<std::string> get_headers_list() const - { - return std::set<std::string>{}; - } - /** Generate the execution window for the component */ - virtual Window get_window() const - { - return Window{}; - } - /** Get tensor arguments */ - ArgumentPack<ITensorInfo> tensors() const - { - return _tensors; - } - /** Get component id */ - ComponentId id() const - { - return _id; - } - -private: - ComponentId _id{-1}; - ArgumentPack<ITensorInfo> _tensors{}; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_IGPUTEMPLATECOMPONENTWRITER */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp deleted file mode 100644 index c165fb5f33..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp +++ /dev/null @@ -1,181 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateActivation.h" - -#include "arm_compute/core/utils/ActivationFunctionUtils.h" -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateActivation::ClTemplateActivation(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateActivation::get_name() const -{ - return "activation"; -} - -std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_group) const -{ - std::string code; - const bool is_root = (comp_group.get_root_component()->id() == this->id()); - - code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -)_"; - if (is_root) - { - code += R"_( -// IN(src) {{src}} -// OUT(dst, accum) {{dst}} - -TILE({{DATA_TYPE}}, M0, N0, {{src}}); -TILE(uint, M0, 1, g_dst_indirect_y); -{ - {{src}}_offset_first_element_in_bytes += g_ind_2 * {{src}}_stride_z; - - T_LOAD({{DATA_TYPE}}, M0, N0, {{TENSOR_TYPE}}, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{src}}); - - T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}}); -} - -LOOP_UNROLLING(int, i, 0, 1, M0, -{ - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); -}) -)_"; - } - else - { - code += R"_( -// IN/OUT(src, accum) {{src}} - -{ - T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{dst}}); -} -)_"; - } - code += R"_( -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - return code; -} - -void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type()); - lut["TENSOR_TYPE"] = "BUFFER"; - - const auto f_act = lower_string(string_from_activation_func(_attributes.activation())); - - lut["ACT"] = f_act; - lut["A_VAL"] = float_to_string_with_full_precision(_attributes.a()); - lut["B_VAL"] = float_to_string_with_full_precision(_attributes.b()); - - return lut; -} - -CLBuildOptions ClTemplateActivation::get_build_options(const ComponentGroup &comp_group) const -{ - /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateActivation::get_config_id() const -{ - std::string config_id{}; - config_id += "activation_"; - config_id += lower_string(string_from_data_type(_src->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(1)); - return config_id; -} - -std::set<std::string> ClTemplateActivation::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h", "activation_float_helpers.h"}; -} - -Window ClTemplateActivation::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h deleted file mode 100644 index 88ee370342..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION - -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/function_info/ActivationLayerInfo.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateActivation final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentActivation::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Destructor */ - ~ClTemplateActivation() override = default; - - /** Prevent instances of this class from being copy constructed */ - ClTemplateActivation(const ClTemplateActivation &activation) = delete; - - /** Prevent instances of this class from being copied */ - ClTemplateActivation &operator=(const ClTemplateActivation &activation) = delete; - - /** Allow instances of this class to be move constructed */ - ClTemplateActivation(ClTemplateActivation &&activation) = default; - - /** Allow instances of this class to be moved */ - ClTemplateActivation &operator=(ClTemplateActivation &&activation) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp deleted file mode 100644 index 0da3a73801..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp +++ /dev/null @@ -1,212 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateCast.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateCast::ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateCast::get_name() const -{ - const size_t src_size = data_size_from_type(_src->data_type()); - const size_t dst_size = data_size_from_type(_dst->data_type()); - - return (src_size >= dst_size) ? "cast_down" : "cast_up"; -} - -std::string ClTemplateCast::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - const std::string kernel_name = get_name(); - const auto is_root = (comp_group.get_root_component()->id() == this->id()); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} CAST --------------------- -)_"; - - if (is_root) - { - code += R"_( -// IN_0(src) {{src}} -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); -{ - {{src}}_offset_first_element_in_bytes += get_global_id(2) * {{src}}_stride_z; - - TILE({{DATA_TYPE_IN}}, M0, N0, {{tmp}}); - T_LOAD({{DATA_TYPE_IN}}, M0, N0, BUFFER, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{tmp}}); -)_"; - } - - code += R"_( - LOOP_UNROLLING(int, m0, 0, 1, M0, - { -)_"; - - if (kernel_name == "cast_down" && is_data_type_quantized(_src->data_type())) - { - code += R"_( - {{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80; -)_"; - } - - if (kernel_name == "cast_down" && - (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE)) - { - code += R"_( - {{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0)); -)_"; - } - else - { - code += R"_( - {{dst}}[m0].v = CONVERT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0)); -)_"; - } - - code += R"_( - }) -)_"; - - if (is_root) - { - code += R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -} -)_"; - } - - code += R"_( -//------------------ END KERNEL {{meta_kernel_id}} CAST --------------------- -)_"; - - return code; -} - -void ClTemplateCast::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - const auto is_root = (comp_group.get_root_component()->id() == this->id()); - - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - lut["tmp"] = (is_root) ? lut["src"].value + "_in_data" : lut["src"]; - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - - lut["DATA_TYPE_IN"] = get_cl_type_from_data_type(_src->data_type()); - lut["DATA_TYPE_OUT"] = get_cl_type_from_data_type(_dst->data_type()); - - return lut; -} - -CLBuildOptions ClTemplateCast::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - - // Set build options - CLBuildOptions build_opts{}; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(_src->dimension(0) % n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - - return build_opts; -} - -std::string ClTemplateCast::get_config_id() const -{ - std::string config_id{}; - - config_id += "_"; - config_id += lower_string(string_from_data_type(_src->data_type())); - config_id += "_"; - config_id += lower_string(string_from_data_type(_dst->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(1)); - - return config_id; -} - -std::set<std::string> ClTemplateCast::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateCast::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - const unsigned int n0 = adjust_vec_size(16 / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h deleted file mode 100644 index 3adca4edc9..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.h +++ /dev/null @@ -1,103 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateCast final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentCast::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateCast(const ClTemplateCast &cast) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateCast &operator=(const ClTemplateCast &cast) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateCast(ClTemplateCast &&cast) = default; - /** Allow instances of this class to be moved */ - ClTemplateCast &operator=(ClTemplateCast &&cast) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATECAST */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp deleted file mode 100644 index 8380620ab2..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp +++ /dev/null @@ -1,364 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateDepthwiseConv2d.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateDepthwiseConv2d::ClTemplateDepthwiseConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings) - : IGpuTemplateComponentWriter{id, tensors}, - _src{}, - _weight{}, - _bias{}, - _dst{}, - _attributes{attributes}, - _settings{settings} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2)) - { - _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2); - } - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst); -} - -std::string ClTemplateDepthwiseConv2d::get_name() const -{ - return "depthwise_conv2d"; -} - -std::string ClTemplateDepthwiseConv2d::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - constexpr int height_idx = 2; // Data Layout is NHWC - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// IN_1(wei) {{weight}} -)_"; - - if (_bias != nullptr && _bias->has_valid_id()) - { - code += R"_( -// IN_1(bia) {{bias}} -)_"; - } - - code += R"_( -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); - -{ -#define _IWEI_WIDTH {{WEI_WIDTH}} -#define _IWEI_HEIGHT {{WEI_HEIGHT}} -#define _IDST_WIDTH {{arg_dst}}_w -#define _IDST_HEIGHT {{arg_dst}}_h -#define _IM0_A M0_A -#define _IN0_A N0_A -#define _IM0_B _IWEI_WIDTH -#define _IN0_B N0 -#define _IBOUNDARY_CHECK (!((_IWEI_WIDTH == 1 && _IWEI_HEIGHT == 1 && {{PAD_LEFT}} == 0 && {{PAD_TOP}} == 0 && M0 == 1))) -)_"; - - code += R"_( - const int yo = g_ind_2 % {{arg_dst}}_h; - const int bout = g_ind_2 / {{arg_dst}}_h; -)_"; - - code += R"_( - - int xi = g_ind_1 * {{STRIDE_X}}; - int yi = yo * {{STRIDE_Y}}; - xi -= {{PAD_LEFT}}; - yi -= {{PAD_TOP}}; - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - {{dst}}[i].v = 0; - }) -)_"; - - if (_weight->dimension(height_idx) < 5) - { - code += R"_( - LOOP_UNROLLING(int, yk, 0, 1, _IWEI_HEIGHT, -)_"; - } - else - { - code += R"_( - for(int yk = 0; yk < _IWEI_HEIGHT; ++yk) -)_"; - } - - code += R"_( - { - TILE({{SRC_DATA_TYPE}}, _IM0_A, _IN0_A, a); - - LOOP_UNROLLING(int, i, 0, 1, _IM0_A, - { - a[i].v = 0; - }) - - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, _IM0_A, _IN0_A, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi + yk * {{DILATION_Y}}, xi, (g_ind_0 / {{DEPTH_MULTIPLIER}}), {{src}}_w, {{src}}_h, {{DILATION_X}}, 1, _IBOUNDARY_CHECK, a); - - TILE({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, b); - - T_LOAD({{WEI_DATA_TYPE}}, _IM0_B, _IN0_B, {{WEI_TENSOR_TYPE}}, {{weight}}, g_ind_0, yk * _IM0_B, 1, {{weight}}_stride_y, b); - - LOOP_UNROLLING(int, m0, 0, 1, M0, - { - LOOP_UNROLLING(int, xk, 0, 1, _IWEI_WIDTH, - { -)_"; - - if (!_settings.is_fma_available()) - { - code += R"_( - {{dst}}[m0].v += a[xk + m0].v * b[xk].v; -)_"; - } - else - { - code += R"_( - {{dst}}[m0].v = fma(a[xk + m0].v, b[xk].v, {{dst}}[m0].v); -)_"; - } - - code += R"_( - }) - }) - } -)_"; - - if (_weight->dimension(height_idx) < 5) - { - code += R"_( - ) -)_"; - } - - if (_bias && _bias->has_valid_id()) - { - code += R"_( - TILE({{BIA_DATA_TYPE}}, 1, N0, {{bias}}); - - T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 0, 0, {{bias}}); - - T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, {{bias}}, {{dst}}); -)_"; - } - - code += R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image() - ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image - : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(input_type), "src"); - - const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image() - ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image - : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - - vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight"); - - if (_bias != nullptr && _bias->has_valid_id()) // optional bias - { - vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias"); - } - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["weight"] = vtable.get_variable(_weight); - - if (_bias != nullptr && _bias->has_valid_id()) // optional bias - { - lut["bias"] = vtable.get_variable(_bias); - lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type()); - } - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["ACC_DATA_TYPE"] = _src->data_type(); - lut["SRC_DATA_TYPE"] = _src->data_type(); - lut["WEI_DATA_TYPE"] = _weight->data_type(); - - switch (vtable.get_variable(_src).kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - lut["SRC_TENSOR_TYPE"] = "IMAGE"; - break; - default: - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - break; - } - - switch (vtable.get_variable(_weight).kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - lut["WEI_TENSOR_TYPE"] = "IMAGE"; - break; - default: - lut["WEI_TENSOR_TYPE"] = "BUFFER"; - break; - } - - // Data Layout is NHWC - constexpr int width_idx = 1; - constexpr int height_idx = 2; - - lut["WEI_WIDTH"] = _weight->dimension(width_idx); - lut["WEI_HEIGHT"] = _weight->dimension(height_idx); - - lut["STRIDE_X"] = _attributes.stride().x(); - lut["STRIDE_Y"] = _attributes.stride().y(); - - lut["PAD_LEFT"] = _attributes.pad().left; - lut["PAD_TOP"] = _attributes.pad().top; - - lut["DILATION_X"] = _attributes.dilation().x(); - lut["DILATION_Y"] = _attributes.dilation().y(); - - lut["DEPTH_MULTIPLIER"] = _attributes.depth_multiplier(); - - return lut; -} - -CLBuildOptions ClTemplateDepthwiseConv2d::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - constexpr unsigned int width_idx = 1; // Data Layout is NHWC - - const unsigned int n0 = _settings.n0(); - const unsigned int m0 = _settings.m0(); - const unsigned int m0_a = _weight->dimension(width_idx) + m0 - 1; - const unsigned int n0_a = _attributes.depth_multiplier() > 1 ? 1 : n0; - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts{}; - - if (_settings.fast_relaxed_math()) - { - build_opts.add_option("-cl-fast-relaxed-math"); - } - else - { - // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations - // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations - build_opts.add_option("-cl-unsafe-math-optimizations"); - } - - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0_A=" + support::cpp11::to_string(n0_a)); - build_opts.add_option("-DM0_A=" + support::cpp11::to_string(m0_a)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateDepthwiseConv2d::get_config_id() const -{ - std::string config_id{}; - - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(1)); - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(2)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(2)); - config_id += "_"; - config_id += string_from_data_type(_src->data_type()); - - return config_id; -} - -std::set<std::string> ClTemplateDepthwiseConv2d::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateDepthwiseConv2d::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - Window win = calculate_max_window(*_dst, Steps(_settings.n0(), _settings.m0())); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h deleted file mode 100644 index 5d04c687c3..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h +++ /dev/null @@ -1,112 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D - -#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateDepthwiseConv2d final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentDepthwiseConv2d::Attributes; - using Settings = ClComponentDepthwiseConv2d::Settings; - /** Constructor - * - * Similar to @ref ClComponentDepthwiseConv2d::validate() - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - * @param[in] settings Component settings - */ - ClTemplateDepthwiseConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings); - /** Prevent instances of this class from being copy constructed */ - ClTemplateDepthwiseConv2d(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateDepthwiseConv2d &operator=(const ClTemplateDepthwiseConv2d &depthwise_conv2d) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateDepthwiseConv2d(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default; - /** Allow instances of this class to be moved */ - ClTemplateDepthwiseConv2d &operator=(ClTemplateDepthwiseConv2d &&depthwise_conv2d) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_weight; - const ITensorInfo *_bias; - const ITensorInfo *_dst; - Attributes _attributes; - Settings _settings; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp deleted file mode 100644 index f6a7a58d1d..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp +++ /dev/null @@ -1,393 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateDirectConv2d.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings) - : IGpuTemplateComponentWriter{id, tensors}, - _src{}, - _weight{}, - _bias{}, - _dst{}, - _attributes{attributes}, - _settings{settings} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2)) - { - _bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2); - } - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _weight, _dst); -} - -std::string ClTemplateDirectConv2d::get_name() const -{ - return "direct_conv2d"; -} - -std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - const auto channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL); - const auto k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx)); - const bool leftover_loop = (_src->dimension(channel_idx) % k0) != 0; - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// IN_1(wei) {{weight}} -)_"; - if (_bias && _bias->has_valid_id()) - { - code += R"_( -// IN_1(bia) {{bias}} -)_"; - } - code += R"_( -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); - -{ -#define _IWEI_WIDTH {{WEI_WIDTH}} -#define _IWEI_HEIGHT {{WEI_HEIGHT}} -#define _ISRC_WIDTH {{SRC_WIDTH}} -#define _ISRC_HEIGHT {{SRC_HEIGHT}} -#define _ISRC_CHANNELS {{SRC_CHANNELS}} -#define _IDST_WIDTH {{DST_WIDTH}} -#define _IDST_HEIGHT {{DST_HEIGHT}} -#define _IDST_CHANNELS {{DST_CHANNELS}} -#define _IY_MULTIPLIER (_IWEI_WIDTH * _IWEI_HEIGHT) - - TILE(int, M0, 1, xi); - TILE(int, M0, 1, yi); - - // Convert the linear index to coordinate - LOOP_UNROLLING(int, i, 0, 1, M0, - { - xi[0].s[i] = ((g_ind_1 + i) % _IDST_WIDTH) * {{STRIDE_X}}; - yi[0].s[i] = ((g_ind_1 + i) / _IDST_WIDTH) * {{STRIDE_Y}}; - xi[0].s[i] -= {{PAD_LEFT}}; - yi[0].s[i] -= {{PAD_TOP}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - {{dst}}[i].v = 0; - }) - - for(int i = 0; i < (_IWEI_WIDTH * _IWEI_HEIGHT); ++i) - { - int xk = i % _IWEI_WIDTH; - int yk = i / _IWEI_WIDTH; - - TILE(int, 1, M0, my); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - int x_s = xi[0].s[i] + xk; - int y_s = yi[0].s[i] + yk; - my[0].s[i] = x_s + y_s *_ISRC_WIDTH; - my[0].s[i] = my[0].s[i] + g_ind_2 * (int)(_ISRC_WIDTH * _ISRC_HEIGHT); - my[0].s[i] = select(-1, my[0].s[i], x_s >= 0); - my[0].s[i] = select(-1, my[0].s[i], x_s < _ISRC_WIDTH); - my[0].s[i] = select(-1, my[0].s[i], y_s >= 0); - my[0].s[i] = select(-1, my[0].s[i], y_s < _ISRC_HEIGHT); - }) - - int ck = 0; - for(; ck <= (_ISRC_CHANNELS - K0); ck += K0) - { - TILE({{SRC_DATA_TYPE}}, M0, K0, a); - TILE({{WEI_DATA_TYPE}}, N0, K0, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, N0, - { - b[i].v = {{ZERO_VALUE}}; - }) - - T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, K0, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a); - - T_LOAD({{WEI_DATA_TYPE}}, N0, K0, {{WEI_TENSOR_TYPE}}, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, K0, NT, T, a, b, {{dst}}); - } -)_"; - - if (leftover_loop) - { - code += R"_( - for(; ck < _ISRC_CHANNELS; ++ck) - { - TILE({{SRC_DATA_TYPE}}, M0, 1, a); - TILE({{WEI_DATA_TYPE}}, N0, 1, b); - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - a[i].v = {{ZERO_VALUE}}; - }) - - LOOP_UNROLLING(int, i, 0, 1, N0, - { - b[i].v = {{ZERO_VALUE}}; - }) - - T_LOAD2D_INDIRECT({{SRC_DATA_TYPE}}, M0, 1, {{SRC_TENSOR_TYPE}}, {{src}}, ck, {{src}}_stride_y, my, a); - - T_LOAD({{WEI_DATA_TYPE}}, N0, 1, BUFFER, {{weight}}, ck, g_ind_0 * _IY_MULTIPLIER + i, _IY_MULTIPLIER, {{weight}}_stride_y, b); - - T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}}); - } - )_"; - } - - code += R"_( -#undef _I_WEI_WIDTH -#undef _I_WEI_HEIGHT -#undef _ISRC_WIDTH -#undef _ISRC_HEIGHT -#undef _ISRC_CHANNELS -#undef _IDST_WIDTH -#undef _IDST_HEIGHT -#undef _IDST_CHANNELS -#undef _IY_MULTIPLIER - - } -)_"; - - if (_bias && _bias->has_valid_id()) - { - code += R"_( - TILE({{BIA_DATA_TYPE}}, 1, N0, bias0); - - T_LOAD({{BIA_DATA_TYPE}}, 1, N0, BUFFER, {{bias}}, g_ind_0, 0, 1, 0, bias0); - - T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}}); - )_"; - } - - code += R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1); - g_dst_indirect_y[i].v += g_ind_2 * (int)({{DST_WIDTH}} * {{DST_HEIGHT}}); - }) -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - return code; -} - -void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image() - ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image - : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer; - vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight"); - - if (_bias && _bias->has_valid_id()) // optional bias - { - vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias"); - } - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst"); -} - -TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["weight"] = vtable.get_variable(_weight); - - if (_bias && _bias->has_valid_id()) // optional bias - { - lut["bias"] = vtable.get_variable(_bias); - lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type()); - } - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["ACC_DATA_TYPE"] = _src->data_type(); - lut["SRC_DATA_TYPE"] = _src->data_type(); - lut["WEI_DATA_TYPE"] = _weight->data_type(); - - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - switch (vtable.get_variable(_weight).kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - { - lut["WEI_TENSOR_TYPE"] = "IMAGE"; - break; - } - default: - { - lut["WEI_TENSOR_TYPE"] = "BUFFER"; - break; - } - } - const auto width_idx = 1; - const auto height_idx = 2; - const auto channel_idx = 0; - - lut["SRC_WIDTH"] = _src->dimension(width_idx); - lut["SRC_HEIGHT"] = _src->dimension(height_idx); - lut["SRC_CHANNELS"] = _src->dimension(channel_idx); - - lut["WEI_WIDTH"] = _weight->dimension(width_idx); - lut["WEI_HEIGHT"] = _weight->dimension(height_idx); - - lut["DST_WIDTH"] = _dst->dimension(width_idx); - lut["DST_HEIGHT"] = _dst->dimension(height_idx); - lut["DST_CHANNELS"] = _dst->dimension(channel_idx); - - lut["STRIDE_X"] = _attributes.stride().x(); - lut["STRIDE_Y"] = _attributes.stride().y(); - - lut["PAD_LEFT"] = _attributes.pad().left; - lut["PAD_TOP"] = _attributes.pad().top; - - lut["ZERO_VALUE"] = 0; - - return lut; -} - -CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &comp_group) const -{ - const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL); - - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx)); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts{}; - if (_settings.fast_relaxed_math()) - { - build_opts.add_option("-cl-fast-relaxed-math"); - } - else - { - // -cl-fast-relaxed-math also sets -cl-finite-math-only and -cl-unsafe-math-optimizations - // to disable -cl-finite-math-only, we only include -cl-unsafe-math-optimizations - build_opts.add_option("-cl-unsafe-math-optimizations"); - } - - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DK0=" + support::cpp11::to_string(k0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateDirectConv2d::get_config_id() const -{ - const DataType data_type = _src->data_type(); - const DataLayout data_layout = _src->data_layout(); - - const unsigned int width_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::WIDTH); - const unsigned int height_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::HEIGHT); - - const unsigned int kernel_size = _weight->dimension(width_idx); - - std::string config_id{}; - config_id += lower_string(string_from_data_type(data_type)); - config_id += "_"; - config_id += support::cpp11::to_string(kernel_size); - config_id += "_"; - config_id += support::cpp11::to_string(_attributes.stride().x()); - config_id += "_"; - config_id += support::cpp11::to_string(_attributes.stride().y()); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(width_idx)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(height_idx)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(data_layout)); - return config_id; -} - -std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateDirectConv2d::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - const auto output_shape = _dst->tensor_shape(); - const auto desc = _settings.direct_conv_descriptor(); - - const unsigned int n0 = adjust_vec_size(desc.n0, output_shape[0]); - const unsigned int m0 = adjust_vec_size(desc.m0, output_shape[1] * output_shape[2]); - - // Create and configure kernel window - Window win = calculate_max_window(output_shape, Steps(n0, m0)); - - const size_t dim_y_collapsed = ceil_to_multiple(output_shape[1] * output_shape[2], m0); - win.set(Window::DimY, Window::Dimension(0, dim_y_collapsed, m0)); - win.set(Window::DimZ, Window::Dimension(0, output_shape.total_size_upper(3), 1)); - - return win; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h deleted file mode 100644 index 03c8cd2f15..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h +++ /dev/null @@ -1,116 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D - -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateDirectConv2d final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentDirectConv2d::Attributes; - using Settings = ClComponentDirectConv2d::Settings; - /** Constructor - * - * Similar to @ref ClComponentDirectConv2d::validate() - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - * @param[in] settings Component settings - */ - ClTemplateDirectConv2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings); - /** Destructor */ - ~ClTemplateDirectConv2d() override = default; - /** Prevent instances of this class from being copy constructed */ - ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateDirectConv2d &operator=(const ClTemplateDirectConv2d &direct_conv2d) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateDirectConv2d(ClTemplateDirectConv2d &&direct_conv2d) = default; - /** Allow instances of this class to be moved */ - ClTemplateDirectConv2d &operator=(ClTemplateDirectConv2d &&direct_conv2d) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_weight; - const ITensorInfo *_bias; - const ITensorInfo *_dst; - Attributes _attributes; - Settings _settings; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDIRECTCONV2D */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp deleted file mode 100644 index 78bff3c3f3..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp +++ /dev/null @@ -1,274 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateElementwiseBinary.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -constexpr unsigned int vector_size_byte_opencl = 16; - -ClTemplateElementwiseBinary::ClTemplateElementwiseBinary(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes} -{ - _lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst); -} - -std::string ClTemplateElementwiseBinary::get_name() const -{ - return "elementwise_binary"; -} - -std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup &comp_group) const -{ - std::string code; - const bool is_root = (comp_group.get_root_component()->id() == this->id()); - const bool is_lhs_input = comp_group.is_input_tensor(_lhs); - const bool is_rhs_input = comp_group.is_input_tensor(_rhs); - - code = - R"_( - //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- -)_"; - - if (is_root) - { - code += - R"_( - TILE(uint, M0, 1, g_dst_indirect_y); -)_"; - } - - if (is_lhs_input) - { - code += - R"_( - TILE({{DATA_TYPE}}, {{lhs_m0}}, N0, {{lhs}}); -)_"; - } - - if (is_rhs_input) - { - code += - R"_( - TILE({{DATA_TYPE}}, {{rhs_m0}}, N0, {{rhs}}); -)_"; - } - - code += - R"_( - { -)_"; - - if (is_lhs_input) - { - code += - R"_( - {{lhs}}_offset_first_element_in_bytes += g_ind_2 * {{lhs}}_stride_w; - T_LOAD({{DATA_TYPE}}, {{lhs_m0}}, {{lhs_n0}}, BUFFER, {{lhs}}, {{lhs_start_ind_0}}, {{lhs_start_ind_1}}, 1, {{lhs}}_stride_y, {{lhs}}); -)_"; - } - - if (is_rhs_input) - { - code += - R"_( - {{rhs}}_offset_first_element_in_bytes += g_ind_2 * {{rhs}}_stride_w; - T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_ind_0}}, {{rhs_start_ind_1}}, 1, {{rhs}}_stride_y, {{rhs}}); -)_"; - } - - code += - R"_( - T_ELTWISE_{{BROADCAST_OP}}{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{lhs}}, {{rhs}}, {{dst}}); -)_"; - - if (is_root) - { - // Calculate the destination indirect Y - code += - R"_( - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1); - g_dst_indirect_y[i].v += g_ind_2 * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -)_"; - } - - code += - R"_( - } - //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- -)_"; - - return code; -} - -void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _lhs, GpuKernelArgumentInfo(common_tensor_type), "lhs"); - - vtable.declare_variable(comp_group, _rhs, GpuKernelArgumentInfo(common_tensor_type), "rhs"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst"); -} - -TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(_lhs->data_type()); - // Arguments and global shared variables - - lut["lhs"] = vtable.get_variable(_lhs); - lut["rhs"] = vtable.get_variable(_rhs); - lut["dst"] = vtable.get_variable(_dst); - lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor()); - - switch (_attributes.operation()) - { - case Attributes::ElementwiseOp::Add: - lut["ELTWISE_OP"] = "ADD"; - break; - case Attributes::ElementwiseOp::Sub: - lut["ELTWISE_OP"] = "SUB"; - break; - case Attributes::ElementwiseOp::Mul: - lut["ELTWISE_OP"] = "MUL"; - break; - default: - ARM_COMPUTE_ERROR("Arithmetic Operation not supported"); - } - - ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_lhs) && - detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0)); - ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_rhs) && - detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0)); - - // Set broadcast parameters - // PRE: All tensors are broadcast-compatible - const auto &lhs_dims = _lhs->tensor_shape(); - const auto &rhs_dims = _rhs->tensor_shape(); - const auto &dst_dims = _dst->tensor_shape(); - - const auto lhs_broadcast_x = dst_dims[0] != 1 && lhs_dims[0] == 1; - const auto rhs_broadcast_x = dst_dims[0] != 1 && rhs_dims[0] == 1; - const auto lhs_broadcast_y = dst_dims[1] != 1 && lhs_dims[1] == 1; - const auto rhs_broadcast_y = dst_dims[1] != 1 && rhs_dims[1] == 1; - const auto lhs_broadcast_z = dst_dims[2] != 1 && lhs_dims[2] == 1; - const auto rhs_broadcast_z = dst_dims[2] != 1 && rhs_dims[2] == 1; - - const auto lhs_broadcast_yz = lhs_broadcast_y && lhs_broadcast_z; - const auto rhs_broadcast_yz = rhs_broadcast_y && rhs_broadcast_z; - - lut["lhs_n0"] = (lhs_broadcast_x) ? "1" : "N0"; - lut["lhs_start_ind_0"] = (lhs_broadcast_x) ? "0" : "g_ind_0"; - lut["rhs_n0"] = (rhs_broadcast_x) ? "1" : "N0"; - lut["rhs_start_ind_0"] = (rhs_broadcast_x) ? "0" : "g_ind_0"; - - lut["lhs_m0"] = (lhs_broadcast_yz) ? "1" : "M0"; - lut["lhs_start_ind_1"] = (lhs_broadcast_yz) ? "0" : "g_ind_1"; - lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0"; - lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1"; - - lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : ""; - - return lut; -} - -CLBuildOptions ClTemplateElementwiseBinary::get_build_options(const ComponentGroup &comp_group) const -{ - CLBuildOptions build_opts{}; - /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_lhs->data_type())); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateElementwiseBinary::get_config_id() const -{ - std::string config_id{}; - config_id += lower_string(string_from_data_type(_dst->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(_dst->data_layout())); - - return config_id; -} - -std::set<std::string> ClTemplateElementwiseBinary::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateElementwiseBinary::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - TensorShape output_shape = _dst->tensor_shape(); - // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged - // This is in line with the collapsing convention used by operators like Conv2d - output_shape.collapse(2U, 1U); - const unsigned int num_elems_processed_per_iteration = - adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); - - return win; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h deleted file mode 100644 index 991c0eca44..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY - -#include "arm_compute/core/experimental/Types.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateElementwiseBinary final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentElementwiseBinary::Attributes; - - /** Constructor - * - * Similar to @ref ClComponentElementwiseBinary::validate() - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateElementwiseBinary(const ClTemplateElementwiseBinary &elementwise) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateElementwiseBinary &operator=(const ClTemplateElementwiseBinary &elementwise) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateElementwiseBinary(ClTemplateElementwiseBinary &&elementwise) = default; - /** Allow instances of this class to be moved */ - ClTemplateElementwiseBinary &operator=(ClTemplateElementwiseBinary &&elementwise) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_lhs; - const ITensorInfo *_rhs; - const ITensorInfo *_dst; - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp deleted file mode 100644 index 522c33a022..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp +++ /dev/null @@ -1,267 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -namespace -{ -constexpr unsigned int serial_vector_size = 8; -} // namespace -ClTemplateLogits1DMaxShiftExpSum::ClTemplateLogits1DMaxShiftExpSum(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _sum = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_1); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src); - ARM_COMPUTE_ERROR_ON_NULLPTR(_sum); - ARM_COMPUTE_ERROR_ON_NULLPTR(_dst); -} - -std::string ClTemplateLogits1DMaxShiftExpSum::get_name() const -{ - return "logits_1d_max_shift_exp_sum"; -} - -std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -#define VEC_TYPE VEC_DATA_TYPE({{DATA_TYPE}}, N0) -#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{DATA_TYPE}}, N0) -{ - __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z; - __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z; - Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT({{sum}}); - VEC_TYPE max_val_vec = (VEC_TYPE)({{MINVAL}}); -)_"; - - const bool beta_defined = (_attributes.beta() != 1.f); - - if (beta_defined) - { - code += R"_( - VEC_TYPE beta = (VEC_TYPE){{BETA}}; -)_"; - } - - constexpr unsigned int _serial_vector_size = 8; - const unsigned int reduction_dim_size = _src->dimension(0); - const unsigned int vector_size = adjust_vec_size(_serial_vector_size, reduction_dim_size); - const bool non_multiple_of_n0 = ((reduction_dim_size % vector_size) != 0); - - if (non_multiple_of_n0) - { - code += R"_( - VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr); - SELECT_TYPE widx = (SELECT_TYPE)PARTIAL_N0 > VEC_OFFS(SELECT_DATA_TYPE({{DATA_TYPE}}), N0); - max_val_vec = max(max_val_vec, select((VEC_TYPE)({{MINVAL}}), data, widx)); -)_"; - } - - code += R"_( - for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0) - { - VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}}))); - max_val_vec = max(data, max_val_vec); - } - - {{DATA_TYPE}} max_val = MAX_REDUCE(max_val_vec, N0); - VEC_TYPE sum1D = 0; -)_"; - - if (non_multiple_of_n0) - { - code += R"_( - data -= max_val; -)_"; - if (beta_defined) - { - code += R"_( - data *= beta; -)_"; - } - - if (_attributes.is_log_softmax()) - { - code += R"_( - VSTORE_PARTIAL(N0, PARTIAL_N0) - (data, 0, (__global {{DATA_TYPE}} *)dst_addr); - data = exp(data); - data = select(0, data, widx); -)_"; - } - else - { - code += R"_( - data = exp(data); - data = select(0, data, widx); - VSTORE_PARTIAL(N0, PARTIAL_N0) - (data, 0, (__global {{DATA_TYPE}} *)dst_addr); -)_"; - } - - code += R"_( - sum1D += data; -)_"; - } - code += R"_( - for(uint i = PARTIAL_N0; i < {{SRC_WIDTH}}; i += N0) - { - VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(src_addr + i * sizeof({{DATA_TYPE}}))); - data -= max_val; -)_"; - - if (beta_defined) - { - code += R"_( - data *= beta; -)_"; - } - - if (_attributes.is_log_softmax()) - { - code += R"_( - VSTORE(N0) - (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}}))); - data = exp(data); -)_"; - } - else - { - code += R"_( - data = exp(data); - VSTORE(N0) - (data, 0, (__global {{DATA_TYPE}} *)(dst_addr + i * sizeof({{DATA_TYPE}}))); -)_"; - } - - code += R"_( - sum1D += data; - } -)_"; - - code += R"_( - *((__global {{DATA_TYPE}} *)sum.ptr) = SUM_REDUCE(sum1D, N0); -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src"); - - vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst"); -} - -TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable, - const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["sum"] = vtable.get_variable(_sum); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - - const DataType data_type = _src->data_type(); - - lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); - lut["BETA"] = float_to_string_with_full_precision(_attributes.beta()); - lut["MINVAL"] = (data_type == DataType::F16) ? std::string("-HALF_MAX") : std::string("-FLT_MAX"); - lut["SRC_WIDTH"] = support::cpp11::to_string(_src->dimension(0)); - - return lut; -} - -CLBuildOptions ClTemplateLogits1DMaxShiftExpSum::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - CLBuildOptions build_opts{}; - - const unsigned int reduction_dim_size = _src->dimension(0); - const unsigned int vector_size = adjust_vec_size(serial_vector_size, reduction_dim_size); - - build_opts.add_option("-DN0=" + support::cpp11::to_string(vector_size)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((reduction_dim_size % vector_size))); - - return build_opts; -} - -std::string ClTemplateLogits1DMaxShiftExpSum::get_config_id() const -{ - std::string config_id = get_name(); - - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += string_from_data_type(_src->data_type()); - - return config_id; -} - -std::set<std::string> ClTemplateLogits1DMaxShiftExpSum::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateLogits1DMaxShiftExpSum::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - Window win = calculate_max_window(*_dst, Steps(_src->dimension(0))); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h deleted file mode 100644 index ac9ddaa9d4..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateLogits1DMaxShiftExpSum final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentLogits1DMaxShiftExpSum::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateLogits1DMaxShiftExpSum(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateLogits1DMaxShiftExpSum(const ClTemplateLogits1DMaxShiftExpSum &) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateLogits1DMaxShiftExpSum &operator=(const ClTemplateLogits1DMaxShiftExpSum &) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateLogits1DMaxShiftExpSum(ClTemplateLogits1DMaxShiftExpSum &&) = default; - /** Allow instances of this class to be moved */ - ClTemplateLogits1DMaxShiftExpSum &operator=(ClTemplateLogits1DMaxShiftExpSum &&) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; // input - const ITensorInfo *_sum; // exponentiated and summed input - const ITensorInfo *_dst; // exponentiated input - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DMAXSHIFTEXPSUM */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp deleted file mode 100644 index 7d7c3e6673..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp +++ /dev/null @@ -1,171 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateLogits1DNorm::ClTemplateLogits1DNorm(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _sum = this->tensors().get_const_tensor(TensorType::ACL_SRC_1); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src); - ARM_COMPUTE_ERROR_ON_NULLPTR(_sum); - ARM_COMPUTE_ERROR_ON_NULLPTR(_dst); -} - -std::string ClTemplateLogits1DNorm::get_name() const -{ - return "logits_1d_norm"; -} - -std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -{ - const int x_offs = g_ind_0 * sizeof({{DATA_TYPE}}); - __global uchar *src_addr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{src}}_stride_y + g_ind_2 * {{src}}_stride_z; - __global uchar *dst_addr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + x_offs + g_ind_1 * {{dst}}_stride_y + g_ind_2 * {{dst}}_stride_z; - Image sum = CONVERT_TENSOR3D_TO_IMAGE_STRUCT_NO_STEP({{sum}}); -)_"; - // Load max value of 1D logits vector (row) - code += R"_( - {{DATA_TYPE}} sum_val = *((__global {{DATA_TYPE}} *)offset(&sum, 0, g_ind_1)); - VEC_DATA_TYPE({{DATA_TYPE}}, N0) - data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr); -)_"; - - if (_attributes.is_log_softmax()) - { - code += R"_( - sum_val = log(sum_val); - data0 -= sum_val; -)_"; - } - else - { - code += R"_( - data0 /= sum_val; -)_"; - } - - code += R"_( - STORE_VECTOR_SELECT(data, {{DATA_TYPE}}, dst_addr, N0, PARTIAL_N0, PARTIAL_N0 != 0 && g_ind_0 == 0); -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateLogits1DNorm::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src"); - - vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst"); -} - -TagLUT ClTemplateLogits1DNorm::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["sum"] = vtable.get_variable(_sum); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - - const DataType data_type = _src->data_type(); - - lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); - - return lut; -} - -CLBuildOptions ClTemplateLogits1DNorm::get_build_options(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - CLBuildOptions build_opts{}; - - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((_src->dimension(0) % n0))); - - return build_opts; -} - -std::string ClTemplateLogits1DNorm::get_config_id() const -{ - std::string config_id = get_name(); - - config_id += "_"; - config_id += support::cpp11::to_string(_src->dimension(0)); - config_id += "_"; - config_id += string_from_data_type(_src->data_type()); - - return config_id; -} - -std::set<std::string> ClTemplateLogits1DNorm::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateLogits1DNorm::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - constexpr unsigned int serial_vector_size = 16; - const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0)); - - Window win = calculate_max_window(*_src, Steps(vector_size)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h deleted file mode 100644 index 5a74be5842..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h +++ /dev/null @@ -1,106 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateLogits1DNorm final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentLogits1DNorm::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateLogits1DNorm(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - /** Prevent instances of this class from being copy constructed */ - ClTemplateLogits1DNorm(const ClTemplateLogits1DNorm &) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateLogits1DNorm &operator=(const ClTemplateLogits1DNorm &) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateLogits1DNorm(ClTemplateLogits1DNorm &&) = default; - /** Allow instances of this class to be moved */ - ClTemplateLogits1DNorm &operator=(ClTemplateLogits1DNorm &&) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; // exponentiated input - const ITensorInfo *_sum; // exponentiated and summed input - const ITensorInfo *_dst; // normalization of input with _sum - - Attributes _attributes; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATELOGITS1DNORM */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp deleted file mode 100644 index 8936db6abe..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp +++ /dev/null @@ -1,470 +0,0 @@ -/* - * Copyright (c) 2023-2024 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplatePool2d.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/misc/ShapeCalculator.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "support/StringSupport.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -namespace -{ -// Shape indexes for NHWC Datalayout -constexpr static int32_t height_idx = 2; -constexpr static int32_t width_idx = 1; -constexpr static int32_t channel_idx = 0; -} // namespace -ClTemplatePool2d::ClTemplatePool2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplatePool2d::get_name() const -{ - return "pool2d"; -} - -std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - // Condition to use 2x2 optimized kernel - if (_attributes.pool_size() == Size2D(2, 2)) - { - return get_2x2_kernel_code(); - } - else - { - return get_MxN_kernel_code(); - } -} - -std::string ClTemplatePool2d::get_MxN_kernel_code() const -{ - const auto pool_type = _attributes.pool_type(); - const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX; - - // Define pool op macro. - std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" - : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_"; - - // Kernel start - // Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0 - // Note: If C is less than N0, N0 should be SHRINKED to the closest smaller N0. This operation is performed on the host side - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// OUT(dst, accum) {{dst}} - -{ - const int idx_out_c = g_ind_0; - const int idx_out_w = g_ind_1; -)_"; - - // Add macro for POOL_OP - code += "\n" + pool_op + "\n"; - - code += R"_( - const int idx_out_h = g_ind_2 % {{DST_HEIGHT}}; - const int idx_out_n = g_ind_2 / {{DST_HEIGHT}}; -)_"; - - // Define common variables. - code += R"_( - __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w; - - __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * {{dst}}_stride_w; - - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - res0 = {{INITIAL_VALUE}}; - - const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}}; - const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}}; - - const int pool_x_s = max((int)0, -idx_in_w); - const int pool_x_e = min((int){{POOL_SIZE_X}}, (int){{SRC_WIDTH}} - idx_in_w); - const int pool_y_s = max((int)0, -idx_in_h); - const int pool_y_e = min((int){{POOL_SIZE_Y}}, (int){{SRC_HEIGHT}} - idx_in_h); -)_"; - - // Determine filter size depending on if padding is excluded or not - if (_attributes.exclude_padding()) - { - code += R"_( - const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s); -)_"; - } - else - { - code += R"_( - const int filter_size = {{POOL_SIZE_X}} * {{POOL_SIZE_Y}}; -)_"; - } - - // Loop through pool size - // if global pooling - if (_attributes.pool_size().x() == _src->dimension(width_idx) && - _attributes.pool_size().y() == _src->dimension(height_idx)) - { - // Begin loop - code += R"_( - // Global pooling path - for(int y = 0; y < {{POOL_SIZE_Y}}; ++y) - { - #pragma unroll 8 - for(int x = 0; x < {{POOL_SIZE_X}}; ++x) - { - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - data0; -)_"; - } - else // if local pooling size - { - code += R"_( - for(int y = pool_y_s; y < pool_y_e; ++y) - { - #pragma unroll 8 - for(int x = pool_x_s; x < pool_x_e; ++x) - { - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - data0; -)_"; - } // end else - - // if condition inside loop - use 32bit acc if mixed_precision. - // End loop through pooling section. - if (fp_mixed_precision) - { - // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE - code += R"_( - data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - res0 = POOL_OP(res0, data0); - } - } -)_"; - } - else // load data, compute result and end loop - { - code += R"_( - data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + (x + idx_in_w) * {{src}}_stride_y + (y + idx_in_h) * {{src}}_stride_z)); - res0 = POOL_OP(res0, data0); - } - } -)_"; - } - - // For Pool AVG ONLY, divide pool output by filter size - if (pool_type == PoolingType::AVG) - { - code += R"_( - res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size; -)_"; - } - - // If mixed precision convert datatype before storing. Then end kernel. - if (fp_mixed_precision) - { - code += R"_( - VEC_DATA_TYPE({{DATA_TYPE}}, N0) - res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0)); - STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - else - { - // Store data - code += R"_( - STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - - code += R"_( -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -} -)_"; - - return code; -} - -std::string ClTemplatePool2d::get_2x2_kernel_code() const -{ - const auto pool_type = _attributes.pool_type(); - const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && pool_type != PoolingType::MAX; - std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" - : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_"; - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -// IN_0(src) {{src}} -// OUT(dst, accum) {{dst}} - -#define SELECT_TYPE SELECT_VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - -{ - const int idx_out_c = g_ind_0; - const int idx_out_w = g_ind_1; -)_"; - - // Add pool op macro - code += "\n" + pool_op + "\n"; - - // If batch size != 1, the batch size dimension is collapsed over the height dimension - code += R"_( - const int idx_out_h = g_ind_2 % {{DST_HEIGHT}}; - const int idx_out_n = g_ind_2 / {{DST_HEIGHT}}; -)_"; - - code += R"_( - const int idx_in_w = idx_out_w * {{STRIDE_X}} - {{PAD_X}}; - const int idx_in_h = idx_out_h * {{STRIDE_Y}} - {{PAD_Y}}; - - __global unsigned char *in_base_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_n * {{src}}_stride_w; - __global unsigned char *out_base_ptr = {{dst}}_ptr + {{dst}}_offset_first_element_in_bytes + idx_out_c * sizeof({{DATA_TYPE}}) + idx_out_w * {{dst}}_stride_y + idx_out_h * {{dst}}_stride_z + idx_out_n * - {{dst}}_stride_w; - const int pool_x_s = max((int)0, -idx_in_w); - const int pool_x_e = min((int)2, (int){{SRC_WIDTH}} - idx_in_w); - const int pool_y_s = max((int)0, -idx_in_h); - const int pool_y_e = min((int)2, (int){{SRC_HEIGHT}} - idx_in_h); - - const int filter_size = (pool_x_e - pool_x_s) * (pool_y_e - pool_y_s); - const int x0 = pool_x_s + idx_in_w; - const int y0 = pool_y_s + idx_in_h; - const int x1 = pool_x_e - 1 + idx_in_w; - const int y1 = pool_y_e - 1 + idx_in_h; - - REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0); -)_"; - - if (fp_mixed_precision) - { - // In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE - code += R"_( - data0 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - data1 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - data2 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); - data3 = CONVERT(VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)), VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)); -)_"; - } - else - { - code += R"_( - data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y0 * {{src}}_stride_z)); - data1 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y0 * {{src}}_stride_z)); - data2 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x0 * {{src}}_stride_y + y1 * {{src}}_stride_z)); - data3 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)(in_base_ptr + x1 * {{src}}_stride_y + y1 * {{src}}_stride_z)); -)_"; - } - - if (pool_type != PoolingType::MAX) - { - // Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound) - code += R"_( - if(filter_size != 4) - { - SELECT_TYPE cond_w_s = (SELECT_TYPE)idx_in_w < (SELECT_TYPE)0; - SELECT_TYPE cond_w_e = (SELECT_TYPE)idx_in_w >= (SELECT_TYPE)({{SRC_WIDTH}} - 1); - SELECT_TYPE cond_h_s = (SELECT_TYPE)idx_in_h < (SELECT_TYPE)0; - SELECT_TYPE cond_h_e = (SELECT_TYPE)idx_in_h >= (SELECT_TYPE)({{SRC_HEIGHT}} - 1); - - data0 = select(data0, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_s)); - data1 = select(data1, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_s)); - data2 = select(data2, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_s | cond_h_e)); - data3 = select(data3, (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0)){{INITIAL_VALUE}}, (SELECT_TYPE)(cond_w_e | cond_h_e)); - } -)_"; - } - - code += R"_( - VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0) - res0 = data0; - res0 = POOL_OP(res0, data1); - res0 = POOL_OP(res0, data2); - res0 = POOL_OP(res0, data3); -)_"; - - if (pool_type == PoolingType::AVG) - { - // If avg pooling divide result accordingly. - if (_attributes.exclude_padding()) - { - code += R"_( - res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size; -)_"; - } - else - { - code += R"_( - res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))4; -)_"; - } - } - - // Store result - if (fp_mixed_precision) - { - code += R"_( - VEC_DATA_TYPE({{DATA_TYPE}}, N0) - res_converted0 = CONVERT(res0, VEC_DATA_TYPE({{DATA_TYPE}}, N0)); - STORE_VECTOR_SELECT(res_converted, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - else - { - code += R"_( - STORE_VECTOR_SELECT(res, {{DATA_TYPE}}, out_base_ptr, N0, PARTIAL_N0, (PARTIAL_N0 != 0) && g_ind_0 == 0); -)_"; - } - - code += R"_( - //------------------ END KERNEL {{meta_kernel_id}} --------------------- -} -#undef SELECT_TYPE -)_"; - - return code; -} - -void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - TagLUT lut{}; - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - - // Retrieve relevant data - const auto padding = _attributes.pad(); - const auto stride = _attributes.stride(); - const auto pool_size = _attributes.pool_size(); - const auto data_type = _src->data_type(); - const auto use_fp_mixed_precision = - (_src->data_type() == DataType::F16) && _attributes.pool_type() != PoolingType::MAX; - const std::string max_initial_value = - _settings.use_inf_as_limit() ? "(-INFINITY)" - : float_to_string_with_full_precision(std::numeric_limits<float>::lowest()); - - // pool specific - lut["STRIDE_X"] = stride.x(); - lut["STRIDE_Y"] = stride.y(); - lut["PAD_X"] = padding.left; - lut["PAD_Y"] = padding.top; - lut["POOL_SIZE_X"] = pool_size.width; - lut["POOL_SIZE_Y"] = pool_size.height; - - // Datatypes and variables - lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type( - (use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use. - lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type); - lut["SRC_WIDTH"] = _src->dimension(width_idx); - lut["SRC_HEIGHT"] = _src->dimension(height_idx); - lut["INITIAL_VALUE"] = (_attributes.pool_type() == PoolingType::MAX) ? max_initial_value : std::string("0"); - - // Tensor specific data - lut["DST_HEIGHT"] = _dst->dimension(height_idx); - - return lut; -} - -CLBuildOptions ClTemplatePool2d::get_build_options(const ComponentGroup &comp_group) const -{ - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts{}; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplatePool2d::get_config_id() const -{ - const DataType data_type = _src->data_type(); - const DataLayout data_layout = _src->data_layout(); - - std::string config_id{}; - config_id += "pooling_layer_2d_"; - config_id += lower_string(string_from_data_type(data_type)); - config_id += "_"; - config_id += lower_string(string_from_data_layout(data_layout)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(width_idx)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(height_idx)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(channel_idx)); - - return config_id; -} - -std::set<std::string> ClTemplatePool2d::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h", "repeat.h"}; -} - -Window ClTemplatePool2d::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - const auto output_shape = _dst->tensor_shape(); - const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0)); - - // Create and configure kernel window - auto win = calculate_max_window(output_shape, Steps(vec_size)); - win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size. - return win; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h deleted file mode 100644 index d1d3c01669..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h +++ /dev/null @@ -1,132 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D - -#include "arm_compute/core/experimental/Types.h" -#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h" -#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplatePool2d final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentPool2d::Attributes; - using Settings = ClComponentPool2d::Settings; - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - * @param[in] settings Component settings - */ - ClTemplatePool2d(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes, - const Settings &settings); - - /** Prevent instances of this class from being copy constructed */ - ClTemplatePool2d(const ClTemplatePool2d &direct_conv2d) = delete; - - /** Prevent instances of this class from being copied */ - ClTemplatePool2d &operator=(const ClTemplatePool2d &direct_conv2d) = delete; - - /** Allow instances of this class to be move constructed */ - ClTemplatePool2d(ClTemplatePool2d &&direct_conv2d) = default; - - /** Allow instances of this class to be moved */ - ClTemplatePool2d &operator=(ClTemplatePool2d &&direct_conv2d) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - /** Generate pooling kernel template code optimized for 2x2 pooling - * - * @return std::String Component code - */ - std::string get_2x2_kernel_code() const; - - /** Generate generalised pooling kernel template code for MxN pooling - * - * @return std::String Component code - */ - std::string get_MxN_kernel_code() const; - - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; - Settings _settings; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEPOOL2D */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp deleted file mode 100644 index c882353fcb..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp +++ /dev/null @@ -1,161 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateReshape.h" - -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -constexpr unsigned int vector_size_byte_opencl = 16; - -ClTemplateReshape::ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateReshape::get_name() const -{ - return "reshape"; -} - -std::string ClTemplateReshape::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - std::string code; - - code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- - -// IN(src) {{src}} -// OUT(dst, accum) {{dst}} - -TILE(uint, M0, 1, g_dst_indirect_y); -{ - __global uchar * base_src_ptr = {{src}}_ptr + {{src}}_offset_first_element_in_bytes; - const int tile_vertical_idx = g_ind_1 * {{arg_dst}}_c + g_ind_2 * {{arg_dst}}_c * {{arg_dst}}_w; - LOOP_UNROLLING(int, _m0, 0, 1, M0, - { - const int row_idx = _m0 * {{arg_dst}}_c + tile_vertical_idx; - const int tile_horizontal_idx = g_ind_0 + row_idx; - LOOP_UNROLLING(int, _n0, 0, 1, N0, - { - {{src}}_ptr = base_src_ptr; - const int linear_idx = tile_horizontal_idx + _n0; - const int in_id_x = linear_idx % {{src}}_c; - const int in_id_y = (linear_idx / {{src}}_c) % {{src}}_w; - const int in_id_z = linear_idx / ({{src}}_c * {{src}}_w); - {{src}}_ptr += in_id_x * sizeof({{DATA_TYPE}}) + in_id_y * {{src}}_stride_y + in_id_z * {{src}}_stride_z; - {{dst}}[_m0].s[_n0] = *((__global {{DATA_TYPE}} *){{src}}_ptr); - }) - }) - - LOOP_UNROLLING(int, i, 0, 1, M0, - { - g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1); - g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w); - g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h); - }) -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - return code; -} - -void ClTemplateReshape::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, - GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst"); -} - -TagLUT ClTemplateReshape::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["meta_kernel_id"] = id(); - lut["DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type()); - - return lut; -} - -CLBuildOptions ClTemplateReshape::get_build_options(const ComponentGroup &comp_group) const -{ - CLBuildOptions build_opts{}; - const auto root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_store_n0 = _dst->dimension(0) % n0; - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0)); - - return build_opts; -} - -std::string ClTemplateReshape::get_config_id() const -{ - std::string config_id{}; - config_id += lower_string(string_from_data_type(_dst->data_type())); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - - return config_id; -} - -std::set<std::string> ClTemplateReshape::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateReshape::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - const unsigned int n0 = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h deleted file mode 100644 index 838a21db6d..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h +++ /dev/null @@ -1,107 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE - -#include "arm_compute/core/experimental/Types.h" - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateReshape final : public IGpuTemplateComponentWriter -{ -public: - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - */ - ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors); - /** Prevent instances of this class from being copy constructed */ - ClTemplateReshape(const ClTemplateReshape &reshape) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateReshape &operator=(const ClTemplateReshape &reshape) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateReshape(ClTemplateReshape &&reshape) = default; - /** Allow instances of this class to be moved */ - ClTemplateReshape &operator=(ClTemplateReshape &&reshape) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp deleted file mode 100644 index 846c712ceb..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp +++ /dev/null @@ -1,279 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ClTemplateResize.h" - -#include "arm_compute/core/Utils.h" -#include "arm_compute/core/utils/helpers/AdjustVecSize.h" -#include "arm_compute/core/utils/StringUtils.h" - -#include "src/core/helpers/WindowHelpers.h" -#include "src/core/utils/ScaleUtils.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateResize::ClTemplateResize(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const ClTemplateResize::Attributes &attributes) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); - - ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst); -} - -std::string ClTemplateResize::get_name() const -{ - return _attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "resize_bilinear" : "resize_nearest"; -} - -std::string ClTemplateResize::get_component_code(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - std::string code = R"_( -//------------------ START KERNEL {{meta_kernel_id}} --------------------- -TILE(uint, 1, 1, g_dst_indirect_y); -{ - const int yo = g_ind_2 % {{arg_dst}}_h; - const int bout = g_ind_2 / {{arg_dst}}_h; -)_"; - - if (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR) - { - if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT) - { - code += R"_( - float xi_f = (g_ind_1 * {{SCALE_X}}); - float yi_f = (yo * {{SCALE_Y}}); -)_"; - } - else - { - code += R"_( - float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}}); - float yi_f = ((yo + 0.5f) * {{SCALE_Y}}); -)_"; - } - - if (_attributes.align_corners()) - { - code += R"_( - xi_f = round(xi_f); - yi_f = round(yi_f); -)_"; - } - - code += R"_( - const int xi0 = clamp((int)xi_f, 0, (int){{src}}_w - 1); - const int yi0 = clamp((int)yi_f, 0, (int){{src}}_h - 1); - - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, {{dst}}); -)_"; - } - else if (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR) - { - if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT) - { - code += R"_( - float xi_f = (g_ind_1 * {{SCALE_X}}); - float yi_f = (yo * {{SCALE_Y}}); -)_"; - } - else - { - code += R"_( - float xi_f = ((g_ind_1 + 0.5f) * {{SCALE_X}} - 0.5f); - float yi_f = ((yo + 0.5f) * {{SCALE_Y}} - 0.5f); -)_"; - } - - code += R"_( - const int xi = (int)floor(xi_f); - const int yi = (int)floor(yi_f); - - TILE({{SRC_DATA_TYPE}}, 1, N0, in00); - TILE({{SRC_DATA_TYPE}}, 1, N0, in01); - TILE({{SRC_DATA_TYPE}}, 1, N0, in10); - TILE({{SRC_DATA_TYPE}}, 1, N0, in11); - - in00[0].v = {{CONSTANT_VALUE}}; - in01[0].v = {{CONSTANT_VALUE}}; - in10[0].v = {{CONSTANT_VALUE}}; - in11[0].v = {{CONSTANT_VALUE}}; - - const int xi0 = clamp(xi, 0, (int){{src}}_w - 1); - const int yi0 = clamp(yi, 0, (int){{src}}_h - 1); - const int xi1 = clamp(xi + 1, 0, (int){{src}}_w - 1); - const int yi1 = clamp(yi + 1, 0, (int){{src}}_h - 1); - - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in00); - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in01); - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in10); - T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in11); -)_"; - - if (is_data_type_float(_src->data_type())) - { - code += R"_( - const {{SRC_DATA_TYPE}} a = ({{SRC_DATA_TYPE}})(xi_f - (float)xi); - const {{SRC_DATA_TYPE}} b = ({{SRC_DATA_TYPE}})(1.f - a); - const {{SRC_DATA_TYPE}} a1 = ({{SRC_DATA_TYPE}})(yi_f - (float)yi); - const {{SRC_DATA_TYPE}} b1 = ({{SRC_DATA_TYPE}})(1.f - a1); - - // Calculate the output - {{dst}}[0].v = ((in00[0].v * b * b1) + (in01[0].v * a * b1) + (in10[0].v * b * a1) + (in11[0].v * a * a1)); -)_"; - } - else - { - code += R"_( - const float a = (xi_f - (float)xi); - const float b = (1.f - a); - const float a1 = (yi_f - (float)yi); - const float b1 = (1.f - a1); - - {{dst}}[0].v = CONVERT_SAT( - (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) + - (CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) + - (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) + - (CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0)); -)_"; - } - } - else - { - ARM_COMPUTE_ERROR("Unsupported interpolation policy"); - } - - code += R"_( - g_dst_indirect_y[0].v = g_ind_1 + (yo * (int)({{arg_dst}}_w)) + bout * (int)({{arg_dst}}_w * {{arg_dst}}_h); -} -//------------------ END KERNEL {{meta_kernel_id}} --------------------- -)_"; - - return code; -} - -void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable, - const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, - const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - const auto dst_argument = vtable.get_variable(comp_group.get_any_dst_tensor()); - lut["arg_dst"] = dst_argument.uniq_name; - - // Local build options - lut["meta_kernel_id"] = id(); - lut["SRC_DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type()); - lut["SRC_TENSOR_TYPE"] = "BUFFER"; - lut["DST_DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type()); - lut["CONSTANT_VALUE"] = string_from_pixel_value(0, _src->data_type()); - - const float scale_x = - scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners()); - const float scale_y = - scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners()); - - lut["SCALE_X"] = float_to_string_with_full_precision(scale_x); - lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y); - - return lut; -} - -CLBuildOptions ClTemplateResize::get_build_options(const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const -{ - const Window root_window = comp_group.get_root_component()->template_writer()->get_window(); - const unsigned int n0 = root_window.x().step(); - const unsigned int m0 = root_window.y().step(); - const unsigned int partial_n0 = _dst->dimension(0) % n0; - - CLBuildOptions build_opts; - - build_opts.add_option("-DN0=" + support::cpp11::to_string(n0)); - build_opts.add_option("-DM0=" + support::cpp11::to_string(m0)); - build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_n0)); - - return build_opts; -} - -std::string ClTemplateResize::get_config_id() const -{ - std::string config_id{}; - - config_id += "resize_"; - config_id += - (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : ""); - config_id += (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "BILINEAR" : ""); - config_id += "_"; - config_id += (_attributes.sampling_policy() == SamplingPolicy::CENTER ? "center" : "topleft"); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(0)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(1)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(2)); - config_id += "_"; - config_id += support::cpp11::to_string(_dst->dimension(3)); - - return config_id; -} - -std::set<std::string> ClTemplateResize::get_headers_list() const -{ - return std::set<std::string>{"helpers.h", "tile_helpers.h"}; -} - -Window ClTemplateResize::get_window() const -{ - ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized"); - - const unsigned int n0 = adjust_vec_size(16 / _src->element_size(), _src->dimension(0)); - Window win = calculate_max_window(*_dst, Steps(n0)); - return win.collapse(win, Window::DimZ); -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h deleted file mode 100644 index 4c69007185..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.h +++ /dev/null @@ -1,120 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE - -#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateResize final : public IGpuTemplateComponentWriter -{ -public: - using Attributes = ClComponentResize::Attributes; - - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - * @param[in] attributes Component attributes - */ - ClTemplateResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes); - - /** Destructor */ - ~ClTemplateResize() override = default; - - /** Prevent instances of this class from being copy constructed */ - ClTemplateResize(const ClTemplateResize &resize) = delete; - - /** Prevent instances of this class from being copied */ - ClTemplateResize &operator=(const ClTemplateResize &resize) = delete; - - /** Allow instances of this class to be move constructed */ - ClTemplateResize(ClTemplateResize &&resize) = default; - - /** Allow instances of this class to be moved */ - ClTemplateResize &operator=(ClTemplateResize &&resize) = default; - - /** Generate kernel component name */ - std::string get_name() const override; - - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - - /** Generate the build options used in the component - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return CLBuildOptions Build options - */ - CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override; - - /** Generate the component config id string used for tuning */ - std::string get_config_id() const override; - - /** Generate the header list used in the component */ - std::set<std::string> get_headers_list() const override; - - /** Generate the execution window for the component */ - Window get_window() const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; - Attributes _attributes; -}; - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute - -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESIZE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp deleted file mode 100644 index d0ec91e0a9..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp +++ /dev/null @@ -1,89 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateStore.h" - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) - : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{} -{ - _src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0); - _dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0); -} - -std::string ClTemplateStore::get_name() const -{ - return "store"; -} - -std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group) const -{ - ARM_COMPUTE_UNUSED(comp_group); - - return R"_( -//------------------ START KERNEL {{meta_kernel_id}} STORE --------------------- -{ - bool x_cond = PARTIAL_N0 != 0 && get_global_id(0) == 0; - - T_STORE_INDIRECT_WIDTH_SELECT({{DST_DATA_TYPE}}, M0, N0, PARTIAL_N0, {{DST_TENSOR_TYPE}}, {{dst}}, g_ind_0, {{dst}}_stride_y, x_cond, {{src}}, g_dst_indirect_y); -//------------------ END KERNEL {{meta_kernel_id}} STORE --------------------- -} - -)_"; -} - -void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "src"); - vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer), - "dst"); -} - -TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const -{ - TagLUT lut{}; - - // Arguments and global shared variables - lut["src"] = vtable.get_variable(_src); - lut["dst"] = vtable.get_variable(_dst); - - // Local build options - lut["meta_kernel_id"] = id(); - lut["DST_TENSOR_TYPE"] = "BUFFER"; - lut["DST_DATA_TYPE"] = _dst->data_type(); - - ARM_COMPUTE_UNUSED(comp_group); - return lut; -} - -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h deleted file mode 100644 index b8c82ceadd..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h +++ /dev/null @@ -1,86 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE - -#include "arm_compute/core/experimental/Types.h" - -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -class ClTemplateStore final : public IGpuTemplateComponentWriter -{ -public: - /** Constructor - * - * @param[in] id Component id - * @param[in] tensors Tensor arguments to the components - */ - ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors); - /** Prevent instances of this class from being copy constructed */ - ClTemplateStore(const ClTemplateStore &store) = delete; - /** Prevent instances of this class from being copied */ - ClTemplateStore &operator=(const ClTemplateStore &store) = delete; - /** Allow instances of this class to be move constructed */ - ClTemplateStore(ClTemplateStore &&store) = default; - /** Allow instances of this class to be moved */ - ClTemplateStore &operator=(ClTemplateStore &&store) = default; - /** Generate kernel component name */ - std::string get_name() const override; - /** Generate kernel component code template - * - * @param[in] comp_group Component group of which the component is a part of - * - * @return std::string Component code - */ - std::string get_component_code(const ComponentGroup &comp_group) const override; - /** Declare all variables used by the component in the @p vtable - * - * @param[out] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - */ - void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - /** Generate the tag look-up table used to instantiate the component code. - * - * @param[in] vtable Variable table - * @param[in] comp_group Component group of which the component is a part of - * - * @return TagLUT Tag lookup table - */ - TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override; - -private: - const ITensorInfo *_src; - const ITensorInfo *_dst; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE */ diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp deleted file mode 100644 index d3d7c8db83..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp +++ /dev/null @@ -1,325 +0,0 @@ -/* - * Copyright (c) 2022-2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#include "ClTemplateWriter.h" - -#include "arm_compute/core/CL/CLKernelLibrary.h" - -#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h" - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/// @note: some tags can be unused since they could be used only for the macros, or only for the component code -std::string ClTemplateWriter::replace_tags(const std::string &code_template, const TagLUT &tags) -{ - std::string replaced_code = ""; - bool scanning_pattern = false; - std::string pattern_found = ""; - for (size_t i = 0; i < code_template.size() - 1; ++i) - { - if (!scanning_pattern) - { - if (code_template[i] == '{' && code_template[i + 1] == '{') - { - i += 1; - scanning_pattern = true; - pattern_found = ""; - } - else - { - replaced_code += code_template[i]; - } - } - else - { - if (code_template[i] == '}' && code_template[i + 1] == '}') - { - i += 1; - scanning_pattern = false; - std::string err = "Pattern " + pattern_found + " not found in tags"; - ARM_COMPUTE_ERROR_ON_MSG(tags.find(pattern_found) == tags.end(), err.c_str()); - replaced_code += tags.find(pattern_found)->second.value; - } - else - { - pattern_found += code_template[i]; - } - } - } - - return replaced_code; -} -ClTemplateWriter::~ClTemplateWriter() -{ -} -ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) : _components{components} -{ -} -std::string ClTemplateWriter::get_name() -{ - return write_kernel_name(); -} -std::string ClTemplateWriter::get_code() -{ - return write_code(); -} -std::string ClTemplateWriter::get_config_id() -{ - std::string config_id = get_name(); - for (const auto &comp : _components) - { - config_id += "--" + comp->template_writer()->get_config_id() + "--"; - } - - return config_id; -} - -CLBuildOptions ClTemplateWriter::get_build_options() -{ - CLBuildOptions build_opts{}; - - for (const auto &comp : _components) - { - build_opts.add_options(comp->template_writer()->get_build_options(_components).options()); - } - - return build_opts; -} - -Window ClTemplateWriter::get_window() const -{ - const auto root_comp = _components.get_root_component(); - ARM_COMPUTE_ERROR_ON_MSG(root_comp == nullptr, "No root component found"); - return root_comp->template_writer()->get_window(); -} - -std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors() -{ - // Assemble GpuKernelArguments - std::map<ITensorInfo::Id, GpuKernelArgument> tensors; - for (const auto t : _components.get_argument_tensors()) - { - tensors.emplace(t->id(), GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info}); - } - return tensors; -} - -std::string ClTemplateWriter::write_code() -{ - ARM_COMPUTE_ERROR_ON_MSG(_components.empty(), "No components found"); - - // These data structures will hold the data from all the components in the blueprint - std::set<std::string> headers_list{}; - std::set<std::string> additional_macros{}; - std::vector<std::string> component_codes{}; // vector because order matters - - // Pass 1: Declare all kernel variables - for (auto &component : _components) - { - component->template_writer()->declare_variables(_vtable, _components); - } - // Pass 2: Generate component codes - for (auto &component : _components) - { - const auto component_writer = component->template_writer(); - auto curr_headers_list = component_writer->get_headers_list(); - auto curr_additional_macros = component_writer->get_additional_macros(); - auto curr_component_code = component_writer->get_component_code(_components); - const auto var_lut = component_writer->get_tag_lut( - _vtable, - _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique - component_codes.push_back(replace_tags(curr_component_code, var_lut)); - - headers_list.insert(curr_headers_list.begin(), curr_headers_list.end()); - if (!additional_macros.empty()) // Some components might not have any - { - additional_macros.insert(replace_tags(curr_additional_macros, var_lut)); - } - } - - // Step 3: Assemble the data gathered by traversing the graph into the string "code" - std::string code = ""; - - for (auto &header : headers_list) - { -#if defined(EMBEDDED_KERNELS) - code += CLKernelLibrary::get().get_program(header).first; -#else // defined(EMBEDDED_KERNELS) - code += "#include \"" + header + "\"\n"; -#endif // defined(EMBEDDED_KERNELS) - } - - for (auto ¯os : additional_macros) - { - code += macros; - } - - auto arguments = _components.get_argument_tensors(); - std::sort(arguments.begin(), arguments.end(), - [](const ITensorInfo *l, const ITensorInfo *r) { return l->id() < r->id(); }); - code += write_kernel_signature(_vtable.get_variable_list(arguments)); - - code += "\n{\n\n"; - - code += " //------------------ START KERNEL_BUILDER_COORDINATE ---------------------\n\n"; - code += write_global_section(); - code += " //------------------ END KERNEL_BUILDER_COORDINATE ---------------------\n"; - - { - const auto tiles = _components.get_tiles(); - std::stringstream tiles_ss; - - tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n"; - - for (auto tile : tiles) - { - const auto var = _vtable.get_variable(tile); - const auto data_type = get_cl_type_from_data_type(tile->data_type()); - const auto var_name = var.uniq_name; - - tiles_ss << " TILE(" << data_type << ", M0, N0, " << var_name << ");\n"; - } - - tiles_ss << " //------------------ END TILE DECLARATION ---------------------\n"; - - code += tiles_ss.str(); - } - - for (const auto &component_code : component_codes) - { - code += component_code; - code += "\n"; - } - - code += "}\n"; - - return code; -} -std::string ClTemplateWriter::write_global_section() const -{ - const auto dst_info = _components.get_any_dst_tensor(); - const auto dst_w = dst_info->dimension(0); - const auto tile_w = std::max(1, get_window().x().step()); - const auto tile_h = std::max(1, get_window().y().step()); - auto leftover_w = dst_w % tile_w; - - std::string code = ""; - code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + - std::to_string(leftover_w) + ");\n"; - code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n"; - code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n"); - - code += " const bool g_cond_x = (g_ind_0 == 0);\n"; - code += " const bool g_cond_y = (g_ind_1 == 0);\n"; - - return code; -} -std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const -{ - std::string code; - switch (var.kernel_argument_info.type) - { - case GpuKernelArgumentInfo::Type::Vector: - { - code += "\n VECTOR_DECLARATION(" + var.uniq_name + ")"; - break; - } - case GpuKernelArgumentInfo::Type::Image: - { - code += "\n IMAGE_DECLARATION(" + var.uniq_name + ")"; - break; - } - case GpuKernelArgumentInfo::Type::Image_3D: - { - code += "\n IMAGE_DECLARATION(" + var.uniq_name + "),"; - code += "\n unsigned int " + var.uniq_name + "_stride_z"; - break; - } - case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D: - { - code += "\n __read_only image2d_t " + var.uniq_name + "_img,"; - code += "\n unsigned int " + var.uniq_name + "_stride_z"; - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer: - { - code += "\n TENSOR4D_T(" + var.uniq_name + ", BUFFER)"; - break; - } - case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image: - { - code += "\n TENSOR4D_T(" + var.uniq_name + ", IMAGE)"; - break; - } - case GpuKernelArgumentInfo::Type::Tensor_3D: - { - code += "\n TENSOR3D_DECLARATION(" + var.uniq_name + ")"; - break; - } - default: - { - ARM_COMPUTE_ERROR("Unsupported declaration generation for GpuKernelArgumentInfo::Type"); - } - } - return code; -} -std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const -{ - std::string code = "\n__kernel void " + write_kernel_name() + "("; - - for (int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i) - { - code += write_argument_declaration(argument_list[i]) + ","; - } - if (static_cast<int>(argument_list.size()) - 1 >= 0) - { - code += write_argument_declaration(argument_list[argument_list.size() - 1]); - } - - code += ')'; - - return code; -} -std::string ClTemplateWriter::write_kernel_name() const -{ - if (_components.empty()) - { - return "empty_kernel"; - } - std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name(); - for (size_t i = 1; i < _components.size(); ++i) - { - name += "___"; - name += _components[i]->template_writer()->get_name(); - } - - return name; -} -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h deleted file mode 100644 index 83f617b6c6..0000000000 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h +++ /dev/null @@ -1,92 +0,0 @@ -/* - * Copyright (c) 2022 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER -#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER - -#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" -#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" -#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h" - -#include <map> - -namespace arm_compute -{ -namespace experimental -{ -namespace dynamic_fusion -{ -/** Use a templated-string-based method to write kernel code - * It stitches the component code templates together based on the valid fusion configuration. - * It then instantiates the actual kernel code from the template and the generated tag lookup table. - */ -class ClTemplateWriter : public IGpuKernelWriter -{ -public: - /** Instantiates a kernel code string from the kernel code template - * @note: some tags can be unused since they could be used only for the macros, or only for the component code - * - * @param[in] code_template Kernel code template - * @param[in] tags Tag lookup table - * - * @return std::string Instantiated kernel string - */ - static std::string replace_tags(const std::string &code_template, const TagLUT &tags); - /** Default constructor */ - ClTemplateWriter() = default; - /** Constructor - * - * @param[in] components Kernel component group from which the kernel will be generated - */ - ClTemplateWriter(const GpuKernelComponentGroup &components); - /** Destructor */ - ~ClTemplateWriter() override; - /** Generate kernel name */ - std::string get_name() override; - /** Generate kernel code */ - std::string get_code() override; - /** Generate build options */ - CLBuildOptions get_build_options() override; - /** Generate config id string of the entire kernel. This is used for tuning */ - std::string get_config_id() override; - /** Generate execution window */ - Window get_window() const override; - /** Get the kernel argument lists of the kernel*/ - std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override; - -private: - std::string write_kernel_name() const; - std::string write_code(); - std::string write_global_section() const; - std::string write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const; - std::string write_kernel_signature(const GpuKernelVariableTable::VariableList &argument_list) const; - -private: - GpuKernelComponentGroup _components{}; - GpuKernelVariableTable _vtable{}; -}; -} // namespace dynamic_fusion -} // namespace experimental -} // namespace arm_compute -#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEWRITER */ diff --git a/tests/validation/dynamic_fusion/gpu/Integration.cpp b/tests/validation/dynamic_fusion/gpu/Integration.cpp index 80dcaa8f90..453983c077 100644 --- a/tests/validation/dynamic_fusion/gpu/Integration.cpp +++ b/tests/validation/dynamic_fusion/gpu/Integration.cpp @@ -63,7 +63,7 @@ namespace validation TEST_SUITE(CL) TEST_SUITE(INTEGRATION) TEST_SUITE(DYNAMIC_FUSION) -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Conv2d is not ported to ckw yet. COMPMID-6259 + TEST_CASE(Conv2d, framework::DatasetMode::ALL) { /* Computation: @@ -156,7 +156,7 @@ TEST_CASE(Conv2d, framework::DatasetMode::ALL) 0.001f); /**< Tolerance value for comparing reference's output against implementation's output for floating point data types */ validate(CLAccessor(t_dst), ref_t_dst_nchw, tolerance_f32); } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF + TEST_CASE(Add_Output_Add_Output, framework::DatasetMode::ALL) { /* Computation: @@ -368,8 +368,9 @@ TEST_CASE(Add_Output_Add_Cast_Cast_Output, framework::DatasetMode::ALL) validate(CLAccessor(t_out_1), ref_t_out_1, tolerance_cast_f32); } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Conv2d is not ported to ckw yet. COMPMID-6259 -TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) +/// TODO: COMPMID-6593 : This integration test fails with CKW backend. +/// It was not enabled for CKW before, therefore went unnoticed. +TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::DISABLED) { // (tensor0) // | @@ -580,7 +581,6 @@ TEST_CASE(Conv2d_Sigmoid_DepthwiseConv2d_Mul, framework::DatasetMode::ALL) constexpr RelativeTolerance<float> tolerance(0.001f); validate(CLAccessor(tensor6), ref_mul_dst_nchw, tolerance); } -#endif // ACL_INTERNAL_TEST_CKW_IN_DF TEST_SUITE(Invalid_Fusion_Should_Fail) TEST_CASE(Multiple_Complex_Ops_0, framework::DatasetMode::ALL) diff --git a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp index 40e1ea8929..2f8c639cea 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/DepthwiseConv2d.cpp @@ -290,7 +290,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, { validate(CLAccessor(_target), _reference, tolerance_f16); } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test as dilation not supported yet in DepthwiseConv2d CKW kernel + TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture<half>, @@ -313,7 +313,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, validate(CLAccessor(_target), _reference, tolerance_f16); } TEST_SUITE_END() // Dilation -#endif // ACL_INTERNAL_TEST_CKW_IN_DF TEST_SUITE_END() // W3x3 TEST_SUITE(Generic) @@ -336,7 +335,7 @@ FIXTURE_DATA_TEST_CASE(RunLarge, { validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test as dilation not supported yet in DepthwiseConv2d CKW kernel + TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture<half>, @@ -359,7 +358,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } TEST_SUITE_END() // Dilation -#endif // ACL_INTERNAL_TEST_CKW_IN_DF TEST_SUITE_END() // Generic TEST_SUITE_END() // FP16 @@ -385,7 +383,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, validate(CLAccessor(_target), _reference, tolerance_f32); } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test as dilation not supported yet in DepthwiseConv2d CKW kernel TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, @@ -409,7 +406,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, validate(CLAccessor(_target), _reference, tolerance_f32); } TEST_SUITE_END() // Dilation -#endif // ACL_INTERNAL_TEST_CKW_IN_DF TEST_SUITE_END() // W3x3 TEST_SUITE(Generic) @@ -445,7 +441,6 @@ FIXTURE_DATA_TEST_CASE(RunLargeKernelSize, validate(CLAccessor(_target), _reference, tolerance_f32); } -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test as dilation not supported yet in DepthwiseConv2d CKW kernel TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuDepthwiseConv2dFixture<float>, @@ -468,7 +463,6 @@ FIXTURE_DATA_TEST_CASE(RunLarge, validate(CLAccessor(_target), _reference, tolerance_f32); } TEST_SUITE_END() // Dilation -#endif // ACL_INTERNAL_TEST_CKW_IN_DF TEST_SUITE_END() // Generic TEST_SUITE_END() // FP32 TEST_SUITE_END() // Float diff --git a/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp index 96b79679c3..82d66ca6ce 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/MatMul.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF + #include "tests/AssetsLibrary.h" #include "tests/CL/CLAccessor.h" #include "tests/datasets/LargeMatMulDataset.h" @@ -333,4 +333,3 @@ TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute -#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp index e537826c71..be816b32b3 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Pool2d.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifdef ACL_INTERNAL_TEST_CKW_IN_DF + #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h" #include "tests/CL/CLAccessor.h" @@ -217,4 +217,3 @@ TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute -#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp index 43617fe1be..a1495cf014 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Reshape.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw + #include "tests/CL/CLAccessor.h" #include "tests/datasets/ReshapeLayerDataset.h" #include "tests/framework/datasets/Datasets.h" @@ -82,7 +82,7 @@ using DynamicFusionGpuReshapeLayerFixture = TEST_SUITE(F32) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuReshapeLayerFixture<float>, - framework::DatasetMode::ALL, + framework::DatasetMode::DISABLED, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::F32))) { @@ -94,7 +94,7 @@ TEST_SUITE_END() // F32 TEST_SUITE(F16) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuReshapeLayerFixture<half>, - framework::DatasetMode::ALL, + framework::DatasetMode::DISABLED, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::F16))) { @@ -106,7 +106,7 @@ TEST_SUITE_END() // F16 TEST_SUITE(U8) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuReshapeLayerFixture<uint8_t>, - framework::DatasetMode::ALL, + framework::DatasetMode::DISABLED, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::U8))) { @@ -118,7 +118,7 @@ TEST_SUITE_END() // U8 TEST_SUITE(S8) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuReshapeLayerFixture<int8_t>, - framework::DatasetMode::ALL, + framework::DatasetMode::DISABLED, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::S8))) { @@ -130,7 +130,7 @@ TEST_SUITE_END() // S8 TEST_SUITE(S16) FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionGpuReshapeLayerFixture<int16_t>, - framework::DatasetMode::ALL, + framework::DatasetMode::DISABLED, combine(datasets::SmallReshapeLayerDataset(), framework::dataset::make("DataType", DataType::S16))) { @@ -145,5 +145,3 @@ TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute - -#endif // ACL_INTERNAL_TEST_CKW_IN_DF diff --git a/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp index b7cb6bace6..8f5a1ed14a 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Softmax.cpp @@ -21,7 +21,7 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ACL_INTERNAL_TEST_CKW_IN_DF // Do not include this test if ACL_INTERNAL_TEST_CKW_IN_DF and the op has not been ported to ckw + #include "arm_compute/core/Types.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h" @@ -46,62 +46,70 @@ namespace validation RelativeTolerance<half> tolerance_f16(half(0.2)); RelativeTolerance<float> tolerance_f32(0.001f); +using framework::dataset::make; + +/// TODO: COMPMID-6713 +/// Softmax is not implemented in CKW. Therefore, the tests are DISABLED. +/// Enable the tests when Softmax is implemented in CKW. + TEST_SUITE(CL) TEST_SUITE(DYNAMIC_FUSION) TEST_SUITE(SOFTMAX) // *INDENT-OFF* // clang-format off -DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip( - framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching data types - TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching shapes - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::S32), // Unsupported data type - TensorInfo(TensorShape(32U, 13U), 1, DataType::F16), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - - }), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U), 1, DataType::F16), - TensorInfo(TensorShape(27U, 11U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM16), // Unsupported data type - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), - - })), - framework::dataset::make("beta", { 1.0, - 2.0, - 2.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - 1.0, - })), - framework::dataset::make("axis", { - 0, - 0, - 1, // Invalid as axis != 0 - 0, - 0, - 0, - -3, // Invalid as axis != 0 - 2, // Invalid as axis != 0 - 1, // Invalid as axis != 0 - -1, // Invalid as axis != 0 - })), - framework::dataset::make("Expected", { false, false, false, true, false, false, false, false, false, false})), - input_info, output_info, beta, axis, expected) +DATA_TEST_CASE(Validate, framework::DatasetMode::DISABLED, + zip( + make("InputInfo", { + TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching data types + TensorInfo(TensorShape(27U, 13U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::S32), // Unsupported data type + TensorInfo(TensorShape(32U, 13U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + }), + make("OutputInfo",{ + TensorInfo(TensorShape(27U, 13U), 1, DataType::F16), + TensorInfo(TensorShape(27U, 11U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::QASYMM16), // Unsupported data type + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U), 1, DataType::F32), + }), + make("beta", { + 1.0, + 2.0, + 2.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + 1.0, + }), + make("axis", { + 0, + 0, + 1, // Invalid as axis != 0 + 0, + 0, + 0, + -3, // Invalid as axis != 0 + 2, // Invalid as axis != 0 + 1, // Invalid as axis != 0 + -1, // Invalid as axis != 0 + }), + make("Expected", { false, false, false, true, false, false, false, false, false, false})), + input_info, output_info, beta, axis, expected) { // Create a new workload sketch CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); @@ -122,33 +130,39 @@ using DynamicFusionSoftmaxLayerFixture = DynamicFusionSoftmaxValidationFixture<C TEST_SUITE(FLOAT) TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SoftmaxLayerSmallShapes(), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0 })), - framework::dataset::make("is_log", {false, true}))) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::F32), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::SoftmaxLayerLargeShapes(), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0 })), - framework::dataset::make("is_log", {false, true}))) +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayerLargeShapes(), + make("DataType", DataType::F32), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(Run4D, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::SoftmaxLayer4DShapes(), - framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0 })), - framework::dataset::make("is_log", {false, true}))) +FIXTURE_DATA_TEST_CASE(Run4D, DynamicFusionSoftmaxLayerFixture<float>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayer4DShapes(), + make("DataType", DataType::F32), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); @@ -156,33 +170,39 @@ FIXTURE_DATA_TEST_CASE(Run4D, DynamicFusionSoftmaxLayerFixture<float>, framework TEST_SUITE_END() // FP32 TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SoftmaxLayerSmallShapes(), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0 })), - framework::dataset::make("is_log", {false, true}))) +FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayerSmallShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::SoftmaxLayerLargeShapes(), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0 })), - framework::dataset::make("is_log", {false, true}))) +FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayerLargeShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(Run4D, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::SoftmaxLayer4DShapes(), - framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("Beta", { 1.0f, 2.0f })), - framework::dataset::make("Axis", { 0 })), - framework::dataset::make("is_log", {false, true}))) +FIXTURE_DATA_TEST_CASE(Run4D, DynamicFusionSoftmaxLayerFixture<half>, framework::DatasetMode::DISABLED, + combine( + datasets::SoftmaxLayer4DShapes(), + make("DataType", DataType::F16), + make("Beta", { 1.0f, 2.0f }), + make("Axis", { 0 }), + make("is_log", {false, true}))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f16); @@ -197,5 +217,3 @@ TEST_SUITE_END() // CL } // namespace validation } // namespace test } // namespace arm_compute - -#endif // ACL_INTERNAL_TEST_CKW_IN_DF |