diff options
Diffstat (limited to 'compute_kernel_writer/prototype/src')
-rw-r--r-- | compute_kernel_writer/prototype/src/Kernel.cpp | 163 | ||||
-rw-r--r-- | compute_kernel_writer/prototype/src/KernelArgument.cpp | 66 | ||||
-rw-r--r-- | compute_kernel_writer/prototype/src/KernelWriter.cpp | 371 | ||||
-rw-r--r-- | compute_kernel_writer/prototype/src/OperandBase.cpp | 49 | ||||
-rw-r--r-- | compute_kernel_writer/prototype/src/Prototype.h | 4189 | ||||
-rw-r--r-- | compute_kernel_writer/prototype/src/TensorInfo.cpp | 77 | ||||
-rw-r--r-- | compute_kernel_writer/prototype/src/TensorOperand.cpp | 272 | ||||
-rw-r--r-- | compute_kernel_writer/prototype/src/TensorTileSampler.cpp | 191 | ||||
-rw-r--r-- | compute_kernel_writer/prototype/src/TileInfo.cpp | 73 | ||||
-rw-r--r-- | compute_kernel_writer/prototype/src/TileOperand.cpp | 135 |
10 files changed, 0 insertions, 5586 deletions
diff --git a/compute_kernel_writer/prototype/src/Kernel.cpp b/compute_kernel_writer/prototype/src/Kernel.cpp deleted file mode 100644 index 6228ed17d0..0000000000 --- a/compute_kernel_writer/prototype/src/Kernel.cpp +++ /dev/null @@ -1,163 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/Kernel.h" - -#include "ckw/TensorOperand.h" -#include "ckw/types/GpuTargetLanguage.h" - -#include "src/Prototype.h" - -namespace ckw -{ - -Kernel::Kernel(GpuTargetLanguage language) : Kernel{"unnamed", language} -{ -} - -Kernel::Kernel(const char *name, GpuTargetLanguage language) - : _name(name), - _kernel(std::make_unique<prototype::GpuKernelWriterDataHolder>(language)), - _operands{}, - _tensor_id_operands{} -{ -} - -Kernel::~Kernel() -{ -} - -const std::string &Kernel::name() const -{ - return _name; -} - -void Kernel::name(const std::string &name) -{ - _name = name; -} -std::vector<KernelArgument> Kernel::arguments() const -{ - std::vector<KernelArgument> arguments; - - const auto impl_args = _kernel->arguments.tensor_argument_declarations(); - - for (auto tensor_arg : impl_args) - { - auto tensor = _tensor_id_operands.at(tensor_arg->format().id); - arguments.push_back(*tensor); - - for (auto component_arg : tensor_arg->component_declarations()) - { - switch (component_arg) - { - case TensorComponentType::OffsetFirstElement: - arguments.push_back(tensor->offset_first_element_in_bytes()); - break; - - case TensorComponentType::Stride1: - arguments.push_back(tensor->stride1()); - break; - - case TensorComponentType::Stride2: - arguments.push_back(tensor->stride2()); - break; - - case TensorComponentType::Stride3: - arguments.push_back(tensor->stride3()); - break; - - case TensorComponentType::Stride4: - arguments.push_back(tensor->stride4()); - break; - - case TensorComponentType::Dim0: - arguments.push_back(tensor->dim0()); - break; - - case TensorComponentType::Dim1: - arguments.push_back(tensor->dim1()); - break; - - case TensorComponentType::Dim2: - arguments.push_back(tensor->dim2()); - break; - - case TensorComponentType::Dim3: - arguments.push_back(tensor->dim3()); - break; - - case TensorComponentType::Dim4: - arguments.push_back(tensor->dim4()); - break; - - case TensorComponentType::Dim1xDim2: - arguments.push_back(tensor->dim1_dim2()); - break; - - case TensorComponentType::Dim1xDim2xDim3: - arguments.push_back(tensor->dim1_dim2_dim3()); - break; - - default: - CKW_ASSERT(false); - } - } - } - - return arguments; -} - -TileOperand &Kernel::register_operand(std::unique_ptr<TileOperand> operand) -{ - const auto &name = operand->name(); - auto ptr = operand.get(); - - CKW_ASSERT(_operands.find(name) == _operands.end()); - _operands[name] = std::move(operand); - - return *ptr; -} - -TensorOperand &Kernel::register_operand(std::unique_ptr<TensorOperand> operand) -{ - const auto id = operand->info().id(); - const auto &name = operand->name(); - auto ptr = operand.get(); - - CKW_ASSERT(_tensor_id_operands.find(id) == _tensor_id_operands.end()); - CKW_ASSERT(_operands.find(name) == _operands.end()); - - _tensor_id_operands[id] = operand.get(); - _operands[name] = std::move(operand); - - return *ptr; -} - -prototype::GpuKernelWriterDataHolder *Kernel::impl() -{ - return _kernel.get(); -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/KernelArgument.cpp b/compute_kernel_writer/prototype/src/KernelArgument.cpp deleted file mode 100644 index 24ace28eb3..0000000000 --- a/compute_kernel_writer/prototype/src/KernelArgument.cpp +++ /dev/null @@ -1,66 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/KernelArgument.h" - -#include "ckw/Error.h" -#include "ckw/TensorOperand.h" - -namespace ckw -{ - -KernelArgument::KernelArgument(TensorOperand &tensor) : _type(Type::TensorStorage), _id(tensor.info().id()) -{ - _sub_id.tensor_storage_type = tensor.storage_type(); -} - -KernelArgument::KernelArgument(TensorComponentOperand &tensor_component) - : _type(Type::TensorComponent), _id(tensor_component.tensor().info().id()) -{ - _sub_id.tensor_component_type = tensor_component.component_type(); -} - -KernelArgument::Type KernelArgument::type() const -{ - return _type; -} - -int32_t KernelArgument::id() const -{ - return _id; -} - -TensorStorageType KernelArgument::tensor_storage_type() const -{ - CKW_ASSERT(_type == Type::TensorStorage); - return _sub_id.tensor_storage_type; -} - -TensorComponentType KernelArgument::tensor_component_type() const -{ - CKW_ASSERT(_type == Type::TensorComponent); - return _sub_id.tensor_component_type; -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp deleted file mode 100644 index 9f58d9fefa..0000000000 --- a/compute_kernel_writer/prototype/src/KernelWriter.cpp +++ /dev/null @@ -1,371 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/KernelWriter.h" - -#include "ckw/Error.h" -#include "ckw/TensorInfo.h" -#include "ckw/TensorOperand.h" - -#include "src/Prototype.h" - -#include <sstream> - -namespace ckw -{ - -namespace -{ - -inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info) -{ - return prototype::TensorInfo{info.shape(), info.data_type(), info.data_layout(), info.id()}; -} - -} // namespace - -// ================================================================================================= -// Constructors and destructor -// ================================================================================================= - -KernelWriter::KernelWriter(Kernel &kernel) - : _kernel(&kernel), - _impl_attr(std::make_unique<prototype::GpuKernelWriterAttribute>()), - _impl(prototype::GpuKernelWriterFactory::create(_impl_attr.get(), kernel.impl())) -{ - _impl->set_IdSpace(1); -} - -KernelWriter::~KernelWriter() -{ -} - -// ================================================================================================= -// Scope management -// ================================================================================================= - -int32_t KernelWriter::id_space() const -{ - return _id_space; -} - -KernelWriter &KernelWriter::id_space(int32_t id_space) -{ - CKW_ASSERT(id_space <= _max_id_space); - - _id_space = id_space; - return *this; -} - -int32_t KernelWriter::next_id_space() -{ - id_space(++_max_id_space); - return _id_space; -} - -// ================================================================================================= -// Tensor and tile declaration -// ================================================================================================= - -TensorOperand & -KernelWriter::declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type) -{ - const auto var_name = generate_variable_name(name); - - _impl->declare_argument(var_name, create_impl_tensor_info(info)); - - auto &operand = _kernel->register_operand(std::make_unique<TensorOperand>(var_name, info, storage_type)); - - return operand; -} - -TileOperand &KernelWriter::declare_tile_argument(const std::string &name, int32_t value) -{ - const auto var_name = generate_variable_name(name); - - auto &operand = _kernel->register_operand(std::make_unique<TileOperand>(var_name, value)); - - return operand; -} - -std::string KernelWriter::generate_variable_name(const std::string &name) const -{ - std::stringstream var_name; - - var_name << "_" << _id_space << "_" << name; - - return var_name.str(); -} - -TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> operand_ptr) -{ - auto &operand = _kernel->register_operand(std::move(operand_ptr)); - const auto &name = operand.name(); - - if (!operand.is_constant()) - { - const auto &info = operand.tile_info(); - - _impl->declare_tile(name, prototype::TileInfo(info.data_type(), info.width(), info.height())); - } - else - { - _impl->declare_const_tile(name, operand.value(), operand.data_type()); - } - - return operand; -} - -// ================================================================================================= -// Load and store -// ================================================================================================= - -void KernelWriter::op_load(TileOperand &tile, - const TensorOperand &tensor, - const TensorTileSampler &sampler, - const TileOperand &dilation_y) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); - - auto impl_x = sampler.x().create_impl_operand(_impl.get()); - auto impl_y = sampler.y().create_impl_operand(_impl.get()); - auto impl_z = sampler.z().create_impl_operand(_impl.get()); - auto impl_b = sampler.b().create_impl_operand(_impl.get()); - - auto impl_dilation_y = dilation_y.create_impl_operand(_impl.get()); - - auto impl_dst = tile.create_impl_operand(_impl.get()); - - _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b, impl_dilation_y); -} - -void KernelWriter::op_load_indirect(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); - - auto impl_x = sampler.x().create_impl_operand(_impl.get()); - auto impl_y = sampler.y().create_impl_operand(_impl.get()); - auto impl_z = sampler.z().create_impl_operand(_impl.get()); - auto impl_b = sampler.b().create_impl_operand(_impl.get()); - - auto impl_dst = tile.create_impl_operand(_impl.get()); - - _impl->op_load_indirect(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b); -} - -void KernelWriter::util_get_indirect_buffer(TileOperand &tile, - const TensorOperand &tensor, - const TensorTileSampler &sampler, - const TileOperand &x, - const TileOperand &y, - const TileOperand &x_off, - const TileOperand &y_off) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); - - auto impl_x = x.create_impl_operand(_impl.get()); - auto impl_y = y.create_impl_operand(_impl.get()); - auto impl_x_off = x_off.create_impl_operand(_impl.get()); - auto impl_y_off = y_off.create_impl_operand(_impl.get()); - - auto impl_dst = tile.create_impl_operand(_impl.get()); - - _impl->util_get_indirect_buffer(impl_dst, impl_tensor, impl_x, impl_y, impl_x_off, impl_y_off); -} - -void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler) -{ - prototype::TensorOperand impl_tensor( - tensor.name(), - prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()), - sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()}); - auto impl_src = tile.create_impl_operand(_impl.get()); - auto impl_x = sampler.x().create_impl_operand(_impl.get()); - auto impl_y = sampler.y().create_impl_operand(_impl.get()); - auto impl_z = sampler.z().create_impl_operand(_impl.get()); - auto impl_b = sampler.b().create_impl_operand(_impl.get()); - - _impl->op_store_immediate(impl_tensor, impl_src, impl_x, impl_y, impl_z, impl_b); -} - -// ================================================================================================= -// Data processing -// ================================================================================================= - -void KernelWriter::op_assign(const TileOperand &dst, const TileOperand &src) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_assign(impl_dst, impl_src); -} - -void KernelWriter::op_cast_expression(const TileOperand &dst, const TileOperand &src, const ConvertPolicy policy) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_cast_expression(impl_dst, impl_src, policy); -} - -void KernelWriter::op_binary_expression(const TileOperand &dst, - const TileOperand &lhs, - BinaryOp op, - const TileOperand &rhs) -{ - auto impl_lhs = lhs.create_impl_operand(_impl.get()); - auto impl_rhs = rhs.create_impl_operand(_impl.get()); - auto impl_dst = dst.create_impl_operand(_impl.get()); - - _impl->op_binary_expression(impl_dst, impl_lhs, op, impl_rhs); -} - -void KernelWriter::op_unary_expression(const TileOperand &dst, UnaryOp op, const TileOperand &src) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_unary_expression(impl_dst, op, impl_src); -} - -void KernelWriter::op_unary_elementwise_function(const TileOperand &dst, UnaryFunction opcode, const TileOperand &src) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_src = src.create_impl_operand(_impl.get()); - - _impl->op_unary_elementwise_function(impl_dst, opcode, impl_src); -} - -void KernelWriter::op_binary_elementwise_function(const TileOperand &dst, - BinaryFunction opcode, - const TileOperand &first, - const TileOperand &second) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_first = first.create_impl_operand(_impl.get()); - auto impl_second = second.create_impl_operand(_impl.get()); - - _impl->op_binary_elementwise_function(impl_dst, opcode, impl_first, impl_second); -} - -void KernelWriter::op_ternary_elementwise_function(const TileOperand &dst, - TernaryFunction opcode, - const TileOperand &first, - const TileOperand &second, - const TileOperand &third) -{ - auto impl_dst = dst.create_impl_operand(_impl.get()); - auto impl_first = first.create_impl_operand(_impl.get()); - auto impl_second = second.create_impl_operand(_impl.get()); - auto impl_third = third.create_impl_operand(_impl.get()); - - _impl->op_ternary_elementwise_function(impl_dst, opcode, impl_first, impl_second, impl_third); -} - -void KernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body) -{ - auto impl_lhs = lhs.create_impl_operand(_impl.get()); - auto impl_rhs = rhs.create_impl_operand(_impl.get()); - - _impl->op_if_header(impl_lhs, op, impl_rhs); - _impl->compound_statement_begin(); - body(); - _impl->compound_statement_end(); -} - -void KernelWriter::op_else_if(const TileOperand &lhs, - BinaryOp op, - const TileOperand &rhs, - const std::function<void()> &body) -{ - auto impl_lhs = lhs.create_impl_operand(_impl.get()); - auto impl_rhs = rhs.create_impl_operand(_impl.get()); - - _impl->op_else_if_header(impl_lhs, op, impl_rhs); - _impl->compound_statement_begin(); - body(); - _impl->compound_statement_end(); -} - -void KernelWriter::op_else(const std::function<void()> &body) -{ - _impl->op_else_header(); - _impl->compound_statement_begin(); - body(); - _impl->compound_statement_end(); -} - -void KernelWriter::op_for_loop(const TileOperand &var_name, - BinaryOp cond_op, - const TileOperand &cond_value_name, - const TileOperand &update_var_name, - AssignmentOp update_op, - const TileOperand &update_value_name, - const std::function<void()> &body) -{ - auto impl_var_name = var_name.create_impl_operand(_impl.get()); - auto impl_cond_value_name = cond_value_name.create_impl_operand(_impl.get()); - auto impl_update_var_name = update_var_name.create_impl_operand(_impl.get()); - auto impl_update_value_name = update_value_name.create_impl_operand(_impl.get()); - - _impl->op_for_loop_header(impl_var_name, cond_op, impl_cond_value_name, impl_update_var_name, update_op, - impl_update_value_name); - _impl->compound_statement_begin(); - body(); - _impl->compound_statement_end(); -} - -// ================================================================================================= -// Misc -// ================================================================================================= - -void KernelWriter::op_get_global_id(const TileOperand &dst, int32_t dim) -{ - _impl->op_get_global_id(prototype::Operand(dst.name()), dim); -} - -void KernelWriter::op_return() -{ - _impl->op_return(); -} - -// ================================================================================================= -// Code generation -// ================================================================================================= - -std::string KernelWriter::generate_code() -{ - return prototype::generate_code(*_kernel->impl(), _kernel->name()); -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/OperandBase.cpp b/compute_kernel_writer/prototype/src/OperandBase.cpp deleted file mode 100644 index e0617fdc06..0000000000 --- a/compute_kernel_writer/prototype/src/OperandBase.cpp +++ /dev/null @@ -1,49 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/OperandBase.h" - -namespace ckw -{ - -OperandBase::OperandBase(const std::string &name) : _name(name) -{ -} - -OperandBase::~OperandBase() -{ -} - -const std::string &OperandBase::name() const -{ - return _name; -} - -OperandBase &OperandBase::name(const std::string &name) -{ - _name = name; - return *this; -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h deleted file mode 100644 index b392fe2651..0000000000 --- a/compute_kernel_writer/prototype/src/Prototype.h +++ /dev/null @@ -1,4189 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H -#define CKW_PROTOTYPE_SRC_PROTOTYPE_H - -#include "ckw/Error.h" -#include "ckw/TensorInfo.h" -#include "ckw/types/ConvertPolicy.h" -#include "ckw/types/DataType.h" -#include "ckw/types/Functions.h" -#include "ckw/types/GpuTargetLanguage.h" -#include "ckw/types/Operators.h" -#include "ckw/types/TensorSamplerTypes.h" - -#include <algorithm> -#include <array> -#include <cassert> // assert (to be removed) -#include <chrono> -#include <cmath> -#include <cstdint> // int32_t -#include <functional> -#include <iostream> // cout (to be removed) -#include <map> -#include <memory> -#include <stdexcept> -#include <string> -#include <unordered_map> -#include <vector> - -namespace ckw -{ -namespace prototype -{ - -// Dummy data structure for Size2D -using Size2D = std::vector<int32_t>; - -// Dummy Status -using Status = void; - -enum class ComponentType : int32_t -{ - Complex = 0, - Simple = 1, - Unfusable = 2 -}; - -enum class GpuCompilationSpeed -{ - Fast = 0x00, // fast compilation may increase the latency of the network - Slow = 0x01 // slow compilation may decrease the latency of the network -}; - -enum class GpuExtensions -{ - Fp16, - Dot8, - Mmul, - FastMath -}; - -struct TensorInfo -{ - TensorShape shape{{0}}; - DataType data_type{DataType::Unknown}; - TensorDataLayout data_layout{TensorDataLayout::Nhwc}; - int32_t id{-1}; -}; - -struct ComponentAttribute -{ - GpuCompilationSpeed compilation_speed{GpuCompilationSpeed::Fast}; - bool overwrite_tile{true}; -}; - -inline std::string data_type_to_cl_type(DataType dt) -{ - switch (dt) - { - case DataType::Fp32: - return "float"; - case DataType::Fp16: - return "half"; - case DataType::Int8: - return "char"; - case DataType::Uint8: - return "uchar"; - case DataType::Uint16: - return "ushort"; - case DataType::Int16: - return "short"; - case DataType::Uint32: - return "uint"; - case DataType::Int32: - return "int"; - case DataType::Bool: - return "bool"; - default: - assert(false); - return ""; - } -} - -inline int32_t width_to_cl_vector_size(int32_t width) -{ - switch (width) - { - case 1: - return 1; - case 2: - return 2; - case 3: - return 3; - case 4: - return 4; - case 5: - case 6: - case 7: - case 8: - return 8; - case 9: - case 10: - case 11: - case 12: - case 13: - case 14: - case 15: - case 16: - return 16; - default: - assert(false); - return 0; - } -} - -inline std::string get_cl_data_type(DataType dt, int32_t width) -{ - std::string data_type; - int32_t w = width_to_cl_vector_size(width); - data_type += data_type_to_cl_type(dt); - if (w != 1) - { - data_type += std::to_string(w); - } - return data_type; -} - -inline std::string to_opencl_store(int32_t vector_length) -{ - if (vector_length != 1) - { - return "vstore" + std::to_string(vector_length) + "("; - } - else - { - return "*("; - } -} - -struct TileInfo -{ - TileInfo() - { - } - - TileInfo(DataType dt) : dt(dt), w(1), h(1) - { - } - - TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1) - { - } - - TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height) - { - } - - DataType dt{DataType::Unknown}; // Data type of the tile - int32_t w{0}; // Width (i.e. c0 - portion of the channels) - int32_t h{0}; // Height (i.e. s0 - portion of the spatial dimensions) -}; - -inline std::ostream &operator<<(std::ostream &o, const TileInfo &a) -{ - o << a.w << " x " << a.h; - return o; -} - -struct DataTypeAsString -{ - std::string str{""}; - DataType dt{DataType::Unknown}; - int32_t size{1}; -}; - -struct ValueAsString -{ - std::string str{""}; - DataTypeAsString type{}; -}; - -// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c -// A Tile is a collection of variables used to express a 2D data. -class IScalarTile -{ -public: - virtual ~IScalarTile() = default; - - /** Method to get the scalar variable from a tile - * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge - * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge - * - * @return the scalar variable as a string - */ - virtual ValueAsString scalar(int32_t x, int32_t y) const = 0; - - /** Method to get the list of underlying variable names used by the tile - * - * @return the list of variable names - */ - virtual std::vector<ValueAsString> underlying_source_variables() const = 0; - - /** Method to get the name of the tile. - * - * @return the name of the tile - */ - std::string name() const - { - return _basename; - } - - /** Method to get the tile format - * - * @return the format - */ - TileInfo format() const - { - return _format; - } - - /** Method to know whether the tile is assignable or not (constant) - * - * @return true if the tile is assignable - */ - virtual bool is_assignable() const = 0; - - /** Method to know whether the tile needs to be declared - * - * @return true if the tile needs to be declared in the code before being used - */ - virtual bool need_declaration() const = 0; - -protected: - TileInfo _format{}; // Tile format - std::string _basename{""}; // Tile name -}; - -// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context. -// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors -class IVectorTile : public IScalarTile -{ -public: - virtual ~IVectorTile() = default; - - /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. - * The user can query the list of supported width for the vectors through preferred_vector_sizes(). - * - * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge - * - * @return the vector variable as a string - */ - virtual ValueAsString vector(int32_t y) const = 0; - - /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. - * - * @return the vector variable as a string - */ - virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0; - /** Method to get the preferred vector sizes. - * - * @return a vector with the preferred vector sizes - */ - //virtual std::vector<int32_t> preferred_vector_sizes() const = 0; -}; - -class ClTile : public IVectorTile -{ -public: - ClTile(const std::string &name, TileInfo format) - { - _format = format; - _basename = name; - } - - ValueAsString scalar(int32_t x, int32_t y) const override - { - x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0)); - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, 1); - t.type.dt = _format.dt; - t.type.size = 1; - - // Check required because if the width has only one element, we cannot use .s0 - if (_format.w != 1) - { - // Automatic broadcasting - t.str += ".s" + std::to_string(x); - } - - return t; - } - - ValueAsString vector(int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, _format.w); - t.type.dt = _format.dt; - t.type.size = _format.w; - return t; - } - - ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, width); - t.type.dt = _format.dt; - t.type.size = width; - - if (_format.w != 1) - { - t.str += ".s"; - for (int i = 0; i < width; ++i) - { - t.str += to_scalar_hex(x_start + i); - } - } - return t; - } - - std::vector<ValueAsString> underlying_source_variables() const override - { - std::vector<ValueAsString> vars; - for (int32_t y = 0; y < _format.h; ++y) - { - ValueAsString t; - t.str = build_variable_name(y); - t.type.str = get_cl_data_type(_format.dt, _format.w); - t.type.dt = _format.dt; - t.type.size = _format.w; - vars.push_back(t); - } - return vars; - } - - bool is_assignable() const override - { - return true; - } - - bool need_declaration() const override - { - return true; - } - -private: - std::string build_variable_name(int32_t y) const - { - std::string var_name = _basename; - - if (_format.h == 1) - { - return var_name; - } - else - { - var_name += "_"; - var_name += std::to_string(y); - } - - return var_name; - } - - std::string to_scalar_hex(int32_t x) const - { - switch (x) - { - case 0: - case 1: - case 2: - case 3: - case 4: - case 5: - case 6: - case 7: - case 8: - case 9: - return std::to_string(x); - case 10: - return "A"; - case 11: - return "B"; - case 12: - return "C"; - case 13: - return "D"; - case 14: - return "E"; - case 15: - return "F"; - default: - std::cout << "Unsupported hexadecimal value" << std::endl; - assert(false); - return ""; - } - } -}; - -// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time. -class ClConstantTile : public IVectorTile -{ -public: - ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt) - { - _format.w = in[0].size(); - _format.h = in.size(); - _format.dt = dt; - - _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w)); - - for (int32_t y = 0; y < _format.h; ++y) - { - for (int32_t x = 0; x < _format.w; ++x) - { - _data[y][x] = in[y][x]; - } - } - } - - ValueAsString scalar(int32_t x, int32_t y) const override - { - x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0)); - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = _data[y][x]; - t.type.str = get_cl_data_type(_format.dt, 1); - t.type.dt = _format.dt; - t.type.size = 1; - - return t; - } - - ValueAsString vector(int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - return vector(0, _format.w, y); - } - - ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override - { - y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0)); - - ValueAsString t; - t.str = ""; - t.type.str = get_cl_data_type(_format.dt, width); - t.type.dt = _format.dt; - t.type.size = width; - - if (width > 1) - { - t.str += "((" + get_cl_data_type(_format.dt, width) + ")("; - } - - int32_t x = x_start; - for (; x < width - 1; ++x) - { - t.str += scalar(x, y).str; - t.str += ", "; - } - t.str += scalar(x, y).str; - - if (width > 1) - { - t.str += "))"; - } - - return t; - } - - std::vector<ValueAsString> underlying_source_variables() const override - { - std::vector<ValueAsString> vars; - - for (int32_t y = 0; y < _format.h; ++y) - { - for (int32_t x = 0; x < _format.w; ++x) - { - ValueAsString t; - t.str = _data[y][x]; - t.type.str = get_cl_data_type(_format.dt, 1); - t.type.dt = _format.dt; - t.type.size = 1; - vars.push_back(t); - } - } - - return vars; - } - - bool is_assignable() const override - { - return false; - } - - bool need_declaration() const override - { - return false; - } - -private: - std::vector<std::vector<std::string>> _data{}; -}; - -enum class TensorComponentIndex : int32_t -{ - IndexMask = 0x0000000f, -}; - -enum class TensorComponentGroup : int32_t -{ - OffsetFirstElement = 0x00000100, - Stride = 0x00001000, - Dimension = 0x00010000, - FoldedDimension = 0x00100000, - Constant = 0x01000000 -}; - -inline std::string to_string(TensorComponentType x) -{ - switch (x) - { - case TensorComponentType::Unknown: - return "Unknown"; - case TensorComponentType::OffsetFirstElement: - return "OffsetFirstElement"; - case TensorComponentType::Stride1: - return "Stride1"; - case TensorComponentType::Stride2: - return "Stride2"; - case TensorComponentType::Stride3: - return "Stride3"; - case TensorComponentType::Stride4: - return "Stride4"; - case TensorComponentType::Dim0: - return "Dim0"; - case TensorComponentType::Dim1: - return "Dim1"; - case TensorComponentType::Dim2: - return "Dim2"; - case TensorComponentType::Dim3: - return "Dim3"; - case TensorComponentType::Dim4: - return "Dim4"; - case TensorComponentType::Dim1xDim2: - return "Dim1xDim2"; - case TensorComponentType::Dim1xDim2xDim3: - return "Dim1xDim2xDim3"; - default: - assert(false); - return ""; - } -} - -class ITensorArgument -{ -public: - virtual ~ITensorArgument() = default; - - /** Method to get the tensor component as a string - * - * @param[in] x tensor component to query - * - * @return the tensor component as a string - */ - virtual std::string component(TensorComponentType x) = 0; - - /** Method to get the tensor component type declaration as a string - * - * @return the tensor component type declaration as a string - */ - virtual std::string component_type_declaration() const = 0; - - /** Method to get the tensor component data type - * - * @return the tensor component data type - */ - virtual DataType component_data_type() const = 0; - - /** Method to get the tensor component declarations - * - * @return a vector containing the tensor component declarations - */ - virtual std::vector<TensorComponentType> component_declarations() const = 0; - - /** Method to get the name of the tensor argument. - * - * @return the name of the tensor argument - */ - std::string name() const - { - return _basename; - } - - /** Method to get the tensor format - * - * @return the format - */ - TensorInfo format() const - { - return _format; - } - -protected: - TensorInfo _format{}; - std::string _basename{}; -}; - -enum class GpuTensorStorage : int32_t -{ - Unknown = 0x0000, - BufferUint8Ptr = 0x0012, - Image2dReadOnly = 0x0020, - Image2dWriteOnly = 0x0021, - Image3dReadOnly = 0x0030, - Image3dWriteOnly = 0x0031 -}; - -inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s) -{ - switch (s) - { - case TensorStorageType::Unknown: - return GpuTensorStorage::Unknown; - - case TensorStorageType::BufferUint8Ptr: - return GpuTensorStorage::BufferUint8Ptr; - - case TensorStorageType::Texture2dReadOnly: - return GpuTensorStorage::Image2dReadOnly; - - case TensorStorageType::Texture2dWriteOnly: - return GpuTensorStorage::Image2dWriteOnly; - - default: - assert(false); - return GpuTensorStorage::Unknown; - } -} - -inline TensorStorageType to_tensor_storage(GpuTensorStorage s) -{ - switch (s) - { - case GpuTensorStorage::Unknown: - return TensorStorageType::Unknown; - - case GpuTensorStorage::BufferUint8Ptr: - return TensorStorageType::BufferUint8Ptr; - - case GpuTensorStorage::Image2dReadOnly: - return TensorStorageType::Texture2dReadOnly; - - case GpuTensorStorage::Image2dWriteOnly: - return TensorStorageType::Texture2dWriteOnly; - - default: - assert(false); - return TensorStorageType::Unknown; - } -} - -class IGpuTensorArgument : public ITensorArgument -{ -public: - virtual ~IGpuTensorArgument() = default; - - /** Method to get the tensor storage, which is the underlying storage used to keep the data memory - * - * @param[in] x tensor storage to query - * - * @return the tensor storage as a string - */ - virtual std::string storage(GpuTensorStorage x) = 0; - - /** Method to get the tensor storage type declaration as a string - * - * @param[in] x tensor component to query - * - * @return the tensor storage type declaration as a string - */ - virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0; - - /** Method to get the tensor storage declarations - * - * @return a vector containing the tensor storage declarations - */ - virtual std::vector<GpuTensorStorage> storage_declarations() const = 0; -}; - -class ClTensorArgument : public IGpuTensorArgument -{ -public: - ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible) - { - _basename = name; - _format = x; - _return_by_value_when_possible = return_by_value_when_possible; - } - - // Methods to override - std::string component(TensorComponentType x) override - { - if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Constant))) - { - int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask); - return std::to_string(idx - 1); - } - - if (_return_by_value_when_possible) - { - if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Dimension))) - { - int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask); - return std::to_string(_format.shape[idx]); - } - - if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::FoldedDimension))) - { - switch (x) - { - case TensorComponentType::Dim1xDim2: - return std::to_string(_format.shape[1] * _format.shape[2]); - case TensorComponentType::Dim1xDim2xDim3: - return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]); - default: - std::cout << "Unsupported folded dimension" << std::endl; - assert(false); - } - } - } - - if (std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end()) - { - _components_required.push_back(x); - } - - return build_component_name(x); - } - - std::string component_type_declaration() const override - { - return "int"; - }; - - DataType component_data_type() const override - { - return DataType::Int32; - } - - std::string storage(GpuTensorStorage x) override - { - if (std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end()) - { - _storage_required.push_back(x); - } - - return build_storage_name(x); - } - - std::string storage_type_declaration(GpuTensorStorage x) const override - { - switch (x) - { - case GpuTensorStorage::BufferUint8Ptr: - return "__global uchar*"; - case GpuTensorStorage::Image2dReadOnly: - return "__read_only image2d_t"; - case GpuTensorStorage::Image2dWriteOnly: - return "__write_only image2d_t"; - case GpuTensorStorage::Image3dReadOnly: - return "__read_only image3d_t "; - case GpuTensorStorage::Image3dWriteOnly: - return "__write_only image3d_t "; - default: - std::cout << "Unsupported storage" << std::endl; - assert(false); - return ""; - } - }; - - std::vector<GpuTensorStorage> storage_declarations() const override - { - return _storage_required; - } - - std::vector<TensorComponentType> component_declarations() const override - { - return _components_required; - } - -private: - std::string build_storage_name(GpuTensorStorage x) const - { - std::string var_name = _basename; - - switch (x) - { - case GpuTensorStorage::BufferUint8Ptr: - return var_name + "_ptr"; - case GpuTensorStorage::Image2dReadOnly: - case GpuTensorStorage::Image2dWriteOnly: - return var_name + "_img2d"; - case GpuTensorStorage::Image3dReadOnly: - case GpuTensorStorage::Image3dWriteOnly: - return var_name + "_img3d"; - default: - std::cout << "Unsupported storage" << std::endl; - assert(false); - } - - return var_name; - } - - std::string build_component_name(TensorComponentType x) const - { - std::string var_name = _basename; - - switch (x) - { - case TensorComponentType::OffsetFirstElement: - return var_name + "_offset_first_element"; - case TensorComponentType::Stride1: - return var_name + "_stride1"; - case TensorComponentType::Stride2: - return var_name + "_stride2"; - case TensorComponentType::Stride3: - return var_name + "_stride3"; - case TensorComponentType::Dim0: - return var_name + "_dim0"; - case TensorComponentType::Dim1: - return var_name + "_dim1"; - case TensorComponentType::Dim2: - return var_name + "_dim2"; - case TensorComponentType::Dim3: - return var_name + "_dim3"; - case TensorComponentType::Dim1xDim2: - return var_name + "_dim1xdim2"; - case TensorComponentType::Dim1xDim2xDim3: - return var_name + "_dim1xdim2xdim3"; - default: - std::cout << "Unsupported component" << std::endl; - assert(false); - } - - return var_name; - } - - bool _return_by_value_when_possible{false}; - std::vector<GpuTensorStorage> _storage_required{}; - std::vector<TensorComponentType> _components_required{}; -}; - -/** - * @brief Data structure that contains the declared tiles by the components. - * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to - * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack - * and remove (pop) all the tiles from the level above. - * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code. - * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions - * when declaring tiles among different components. - * - */ -class GpuTileRegistry -{ -public: - enum class RegistryTileType - { - Tile, - Link - }; - - using RegistryIdSpace = int32_t; - using RegistryLevel = int32_t; - using RegistryTileName = std::string; - - struct RegistryTileTableEntry - { - RegistryLevel registry_level{0}; - std::unique_ptr<IVectorTile> tile_object{nullptr}; - }; - - struct RegistryTileTypeTableEntry - { - RegistryTileType tile_type{RegistryTileType::Tile}; - RegistryTileName tile_name{}; - RegistryIdSpace registry_idspace{0}; - RegistryLevel registry_level{0}; - }; - - using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>; - using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>; - - /** - * @brief Construct a new Gpu Tile Registry object - * - */ - GpuTileRegistry() - { - _language = GpuTargetLanguage::Unknown; - } - - /** - * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language - * - * @param[in] language Gpu programming language to use - */ - GpuTileRegistry(GpuTargetLanguage language) - { - _language = language; - } - - /** - * @brief Default destructor. Destroy the Gpu Tile Registry object - * - */ - ~GpuTileRegistry() = default; - - /** - * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles. - * Therefore, the IdSpace should be set before declaring any tiles. - * - * @param[in] id The IdSpace id - */ - void set_IdSpace(int32_t id) - { - _IdSpace = id; - } - - /** - * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles - * - * @return The IdSpace id - */ - int32_t IdSpace() const - { - return _IdSpace; - } - - /** - * @brief Gets all the IdSpace declarations defined in the tile registry. - * - * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations. - */ - std::vector<int32_t> IdSpace_declarations() const - { - std::vector<int32_t> x; - - auto it = _frags.begin(); - - while (it != _frags.end()) - { - x.push_back(it->first); - - it++; - } - - return x; - } - - /** - * @brief Declare a tile from a previously created tile - */ - void insert(const std::string &name, const IVectorTile *frag) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - const std::string var_name = frag->name(); - TileInfo format = frag->format(); - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if (result == nullptr) - { - std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format); - - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - } - - /** - * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace() - * - * @note The reference name used for declaring the tile should not be previously used in the IdSpace - * - * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry. - * @param[in] format Tile format use to use - */ - void insert(const std::string &name, const TileInfo &format) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - const std::string var_name = generate_tile_name(name); - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if (result == nullptr) - { - std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format); - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - } - - /** - * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string - * - * @note The reference name used for declaring the tile should not be previously used in the IdSpace - * - * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry. - * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile - * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure - * that the data type is aligned with the content of the std::string. - */ - void insert(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if (result == nullptr) - { - std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt); - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - } - - /** - * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string - * - * @note This method can be used to declare temporary tiles that need to be accessed only once. - * - * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile - * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure - * that the data type is aligned with what passed with the std::string. - * - * @return IVectorTile* the anonymous constant tile - */ - IVectorTile *insert(const std::vector<std::vector<std::string>> &in, DataType dt) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++); - - // First check whether a tile with the same name exists - IVectorTile *result = (*this)[key_var_name]; - assert(result == nullptr); - if (result == nullptr) - { - std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt); - _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); - _frags[key_IdSpace][key_var_name].registry_level = _registry_level; - - _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile; - _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name; - _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace; - _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level; - } - - return (*this)[key_var_name]; - } - - /** - * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user - * - * @param[in] name The name of the tile to retrieve - * @param[in] IdSpace The IdSpace id where to search the tile - * - * @return IVectorTile* The tile - */ - IVectorTile *get(const std::string &name, int32_t IdSpace) - { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; - - IVectorTile *result = nullptr; - auto search_IdSpace = _frags.find(key_IdSpace); - if (search_IdSpace != _frags.end()) - { - auto search_tile = _frags[key_IdSpace].find(key_var_name); - if (search_tile != _frags[key_IdSpace].end()) - { - result = search_tile->second.tile_object.get(); - assert(result != nullptr); - } - } - - return result; - } - - /** - * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace() - * - * @param[in] name The name of the tile to retrieve - * - * @return IVectorTile* The tile - */ - IVectorTile *operator[](const std::string &name) - { - return get(name, _IdSpace); - } - - /** - * @brief Check whether the tile in the in the IdSpace provided by the user exists - * - * @param[in] name Name of the tile to search for - * @param[in] IdSpace The IdSpace id where to search the tile - * - * @return true if the tile exists - * @return false if the tile does not exist - */ - bool has_tile(const std::string &name, int32_t IdSpace) const - { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; - - // IVectorTile* result = nullptr; - auto search_IdSpace = _frags.find(key_IdSpace); - - return search_IdSpace != _frags.end(); - } - - /** - * @brief Check whether the tile within the current IdSpace exists - * - * @param[in] name Name of the tile to search for - * - * @return true if the tile exists - * @return false if the tile does not exist - */ - bool has_tile(const std::string &name) const - { - return has_tile(name, _IdSpace); - } - - /** - * @brief Get all the tiles declared within the IdSpace provided by the user - * - * @param[in] IdSpace IdSpace where to retrieve all the declared tiles - * - * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user - */ - std::vector<IVectorTile *> tile_declarations(int32_t IdSpace) - { - std::vector<IVectorTile *> tiles; - - std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin(); - - while (it != _frag_types[IdSpace].end()) - { - // The following line should be enabled. However, we cannot at this stage - // because it used to retrieve the output tile produced by each component. - // However, this method should NOT be used to retrieve the output tile - //if(it->second.tile_type == RegistryTileType::Tile) - { - tiles.push_back(get(it->second.tile_name, it->second.registry_idspace)); - } - it++; - } - - return tiles; - } - - /** - * @brief Increase the level of stack. - * - */ - void increment_registry_level() - { - _registry_level++; - } - - /** - * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack. - * - */ - void decrement_registry_level() - { - assert(_registry_level >= 0); - - // Remove all variables in the local scope - std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin(); - - while (it != _frags[_IdSpace].end()) - { - if (it->second.registry_level == _registry_level) - { - it = _frags[_IdSpace].erase(it); - } - else - { - it++; - } - } - - std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin(); - - while (it_type != _frag_types[_IdSpace].end()) - { - if (it_type->second.registry_level == _registry_level) - { - it_type = _frag_types[_IdSpace].erase(it_type); - } - else - { - it_type++; - } - } - - _registry_level--; - } - - /** - * @brief Get the level of the stack - * - */ - int32_t level() const - { - return _registry_level; - } - -private: - // This method ensures that the key is unique among different components - std::string generate_tile_name(const std::string &name) - { - assert(_IdSpace >= 0); - if (_registry_level == 0) - { - return "_G" + std::to_string(_IdSpace) + "_" + name; - } - else - { - return name; - } - } - - RegistryTileTable _frags{}; - RegistryTileTypeTable _frag_types{}; - RegistryLevel _registry_level{0}; - RegistryIdSpace _IdSpace{-1}; - int32_t _anonymous_frag_count{0}; // Counter used to create the anonymous tiles - GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language -}; - -using TensorEntry = std::unique_ptr<IGpuTensorArgument>; - -/** - * @brief Data structure that contains the tensors consumed by the components. - * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions - * when declaring tensors among different components. - * - */ -class GpuTensorArgumentRegistry -{ -public: - /** - * @brief Construct a new Gpu Tensor Registry object - * - */ - GpuTensorArgumentRegistry() - { - _language = GpuTargetLanguage::Unknown; - } - - /** - * @brief Construct a new Gpu Tensor Registry object - * - * @param[in] language Gpu programming language to use - */ - GpuTensorArgumentRegistry(GpuTargetLanguage language) - { - _language = language; - } - - /** - * @brief Default destructor. Destroy the Gpu Tensor Registry object - * - */ - ~GpuTensorArgumentRegistry() = default; - - /** - * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors. - * Therefore, the IdSpace should be set before declaring any tensors. - * - * @param[in] id The IdSpace id - */ - void set_IdSpace(int32_t id) - { - _IdSpace = id; - } - - /** - * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors - * - * @return The IdSpace id - */ - int32_t IdSpace() const - { - return _IdSpace; - } - - /** - * @brief Gets all the IdSpace declarations defined in the tensor registry. - * - * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations. - */ - std::vector<int32_t> IdSpace_declarations() const - { - std::vector<int32_t> x; - - auto it = _refs.begin(); - - while (it != _refs.end()) - { - x.push_back(it->first); - - it++; - } - - return x; - } - - /** - * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace() - * - * @note The reference name used for declaring the tensor should not be previously used in the IdSpace - * - * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry. - * @param[in] x Pair of tensor info and tensor id - * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components - */ - void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible) - { - assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const int32_t tensor_id = x.id; - const std::string key_var_name = name; - const std::string var_name = generate_tensor_name(name, tensor_id); - - // First, check whether the tensor has already a reference. If so, trigger an assert - assert(!has_tensor_argument(name)); - - // Check whether a tensor with that tensorID exists - auto result = _tensor_arguments.find(tensor_id); - if (result == _tensor_arguments.end()) - { - // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference - std::unique_ptr<ClTensorArgument> arg = - std::make_unique<ClTensorArgument>(var_name, x, return_by_value_when_possible); - _tensor_arguments[tensor_id] = std::move(arg); - } - - _refs[key_IdSpace][key_var_name] = tensor_id; - } - - /** - * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace() - * - * @param[in] name The name of the tensor to retrieve - * - * @return IGpuTensor* The tensor - */ - IGpuTensorArgument *operator[](const std::string &name) - { - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - - IGpuTensorArgument *result = nullptr; - auto search_IdSpace = _refs.find(key_IdSpace); - if (search_IdSpace != _refs.end()) - { - auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - - if (search_tensor_id != _refs[key_IdSpace].end()) - { - const int32_t tensor_id = search_tensor_id->second; - auto search_tensor_argument = _tensor_arguments.find(tensor_id); - if (search_tensor_argument != _tensor_arguments.end()) - { - result = search_tensor_argument->second.get(); - } - assert(result != nullptr); - } - } - - return result; - } - - /** - * @brief Get all the tensors declared in the IdSpace provided by the user - * - * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors - */ - std::vector<IGpuTensorArgument *> tensor_argument_declarations() - { - std::vector<IGpuTensorArgument *> args; - - auto it = _tensor_arguments.begin(); - - while (it != _tensor_arguments.end()) - { - args.push_back(it->second.get()); - it++; - } - - return args; - } - - /** - * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists - * - * @param[in] name Name of the tensor argument to search for - * - * @return true if the tensor argument exists - * @return false if the tensor argument does not exist - */ - bool has_tensor_argument(const std::string &name) - { - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; - - auto search_IdSpace = _refs.find(key_IdSpace); - - if (search_IdSpace != _refs.end()) - { - auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - - return search_tensor_id != _refs[key_IdSpace].end(); - } - else - { - return false; - } - } - - /** - * @brief Check whether the tensor argument is in the the IdSpace provided by the user - * - * @param[in] name Name of the tensor argument to search for - * @param[in] IdSpace The IdSpace id where to search the tensor argument - * - * @return true if the tile exists - * @return false if the tile does not exist - */ - bool has_tensor_argument(const std::string &name, int32_t IdSpace) - { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; - - auto search_IdSpace = _refs.find(key_IdSpace); - - if (search_IdSpace != _refs.end()) - { - auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); - - return search_tensor_id != _refs[key_IdSpace].end(); - } - else - { - return false; - } - } - -private: - // This method ensures that the key is unique among different components - std::string generate_tensor_name(const std::string &name, int32_t tensor_id) - { - assert(tensor_id >= 0); - - return name + std::to_string(tensor_id); - } - - std::map<int32_t, TensorEntry> _tensor_arguments{}; - std::map<int32_t, std::map<std::string, int32_t>> _refs{}; - int32_t _IdSpace{-1}; - GpuTargetLanguage _language{GpuTargetLanguage::Unknown}; // Gpu programming language -}; - -enum class OpType : int32_t -{ - Elementwise = 0x0000, - Relational = 0x1000, - Algebra = 0x2000 -}; - -inline std::string to_string(AssignmentOp op) -{ - switch (op) - { - case AssignmentOp::Decrement: - return "-="; - case AssignmentOp::Increment: - return "+="; - default: - assert(false); - return ""; - } -} - -inline std::string to_string(UnaryOp op) -{ - switch (op) - { - case UnaryOp::LogicalNot: - return "!"; - case UnaryOp::BitwiseNot: - return "~"; - case UnaryOp::Negate: - return "-"; - default: - assert(false); - return ""; - } -} - -inline std::string to_string(BinaryOp op) -{ - switch (op) - { - case BinaryOp::Add: - return "+"; - case BinaryOp::Sub: - return "-"; - case BinaryOp::Mul: - return "*"; - case BinaryOp::Div: - return "/"; - case BinaryOp::Mod: - return "%"; - case BinaryOp::Equal: - return "=="; - case BinaryOp::Less: - return "<"; - case BinaryOp::LessEqual: - return "<="; - case BinaryOp::Greater: - return ">"; - case BinaryOp::GreaterEqual: - return ">="; - case BinaryOp::LogicalAnd: - return "&&"; - case BinaryOp::LogicalOr: - return "||"; - case BinaryOp::BitwiseXOR: - return "^"; - default: - assert(false); - return ""; - } -} - -inline std::string binary_op_string(BinaryOp op) -{ - switch (op) - { - case BinaryOp::Add: - return "add"; - case BinaryOp::Sub: - return "sub"; - case BinaryOp::Mul: - return "mul"; - case BinaryOp::Div: - return "div"; - case BinaryOp::Mod: - return "mod"; - case BinaryOp::Equal: - return "eq"; - case BinaryOp::Less: - return "gt"; - case BinaryOp::LessEqual: - return "gteq"; - case BinaryOp::Greater: - return "lt"; - case BinaryOp::GreaterEqual: - return "lte"; - default: - assert(false); - return ""; - } -} - -enum class OperandType : int32_t -{ - Unknown = 0x00000000, - ScalarFp32 = 0x00001011, // Immediate scalar tile - ScalarFp16 = 0x00001012, // Immediate scalar tile - ScalarInt32 = 0x00001021, // Immediate scalar tile - ScalarInt16 = 0x00001022, // Immediate scalar tile - ScalarInt8 = 0x00001024, // Immediate scalar tile - ScalarUInt32 = 0x00001031, // Immediate scalar tile - ScalarUInt16 = 0x00001032, // Immediate scalar tile - ScalarUInt8 = 0x00001034, // Immediate scalar tile - ScalarBool = 0x00001041, // Immediate scalar tile - ScalarTile = 0x00001050, // Scalar from a tile - Tile = 0x00010000, // Tile - TensorStride1 = 0x00100001, // Tensor component - TensorStride2 = 0x00100002, // Tensor component - TensorStride3 = 0x00100003, // Tensor component - TensorStride4 = 0x00100004, // Tensor component - TensorDim0 = 0x00100010, // Tensor component - TensorDim1 = 0x00100020, // Tensor component - TensorDim2 = 0x00100030, // Tensor component - TensorDim3 = 0x00100040, // Tensor component - TensorDim4 = 0x00100050, // Tensor component - TensorC = 0x00100010, // Tensor component - TensorW = 0x00100020, // Tensor component - TensorH = 0x00100030, // Tensor component - TensorD = 0x00100040, // Tensor component - TensorN = 0x00100050, // Tensor component - TensorDim1xDim2 = 0x00100100, // Tensor component - TensorDim1xDim2xDim3 = 0x00100200, // Tensor component - TensorWxH = 0x00100300, // Tensor component - TensorWxHxD = 0x00100400, // Tensor component - TensorDataOffset = 0x00100500, // Tensor component -}; - -struct ScalarTileCoord -{ - ScalarTileCoord() - { - } - - ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0) - { - } - - int32_t x{-1}; - int32_t y{-1}; -}; - -/** - * @brief Operand class. This object is used to pass the operands to the operations performed by the writer. - * Operand can be of three types: - * -# Scalar immediate: constant expression - * -# Tile: A tile - * -# Tensor component: A component (scalar) of a tensor - * - */ -class Operand -{ -public: - Operand(const std::string &val) - { - _str = val; - _type = OperandType::Tile; - } - - Operand(const std::string &val, const ScalarTileCoord &coord) - { - _str = val; - _type = OperandType::ScalarTile; - _coord = coord; - } - - Operand(const std::string &val, OperandType type) - { - _str = val; - _type = type; - } - - Operand(const Operand &t) - { - _str = t.value(); - _type = t.type(); - } - - Operand &operator=(const Operand &t) - { - _str = t.value(); - _type = t.type(); - _coord = t.scalar_tile_coordinate(); - return *this; - } - - std::string value() const - { - return _str; - } - - OperandType type() const - { - return _type; - } - - ScalarTileCoord scalar_tile_coordinate() const - { - return _coord; - } - -private: - std::string _str{}; - OperandType _type{OperandType::Unknown}; - ScalarTileCoord _coord{}; -}; - -using GpuSamplerTensorStorage = GpuTensorStorage; - -struct GpuSampler -{ - GpuSampler() = default; - - TensorSamplerFormat format{TensorSamplerFormat::Unknown}; - GpuSamplerTensorStorage storage{GpuSamplerTensorStorage::Unknown}; - TensorSamplerAddressModeX address_mode_x{TensorSamplerAddressModeX::Unknown}; - TensorSamplerAddressModeY address_mode_y{TensorSamplerAddressModeY::Unknown}; - TensorSamplerAddressModeZ address_mode_z{TensorSamplerAddressModeZ::Unknown}; -}; - -inline GpuSampler create_simple_sampler( - const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z) -{ - CKW_UNUSED(step_x, step_y, step_z); - - auto tensor = tensor_info_id->shape; - - GpuSampler dst_sampler; - dst_sampler.format = sampler.format; - dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr; - dst_sampler.address_mode_x = sampler.address_mode_x; - dst_sampler.address_mode_y = sampler.address_mode_y; - dst_sampler.address_mode_z = sampler.address_mode_z; - - int32_t dim_x = 0; - int32_t dim_y = 0; - int32_t dim_z = 0; - - switch (sampler.format) - { - case TensorSamplerFormat::C_W_H: - dim_x = tensor[0]; - dim_y = tensor[1]; - dim_z = tensor[2]; - break; - case TensorSamplerFormat::C_WH_1: - dim_x = tensor[0]; - dim_y = tensor[1] * tensor[2]; - dim_z = 1; - break; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; - } - - if (dim_x == 1) - { - assert(step_x == 1); - dst_sampler.address_mode_x = TensorSamplerAddressModeX::None; - } - - if (dim_y == 1) - { - assert(step_y == 1); - dst_sampler.address_mode_y = TensorSamplerAddressModeY::None; - } - - if (dim_z == 1) - { - assert(step_z == 1); - dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None; - } - - return dst_sampler; -} - -class GpuOutputSampler -{ -public: - GpuOutputSampler() = default; - - /** - * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once - * by the root component. Once initialized, all simpler components will need to used this sampler - * or a broadcasted version of it - * - * @param[in] sampler GpuSampler - * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile! - * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile! - * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile! - */ - void initialize(const TensorInfo *tensor_info_id, - GpuSamplerTensorStorage tensor_storage, - TensorSamplerFormat tensor_format, - int32_t step_x, - int32_t step_y, - int32_t step_z) - { - assert(_is_initialized == false); - - _step_x = step_x; - _step_y = step_y; - _step_z = step_z; - _tensor_info_id = tensor_info_id; - _sampler = create_sampler(tensor_storage, tensor_format); - _is_initialized = true; - }; - - GpuSampler sampler() const - { - return _sampler; - }; - - int32_t step_x() const - { - return _step_x; - }; - - int32_t step_y() const - { - return _step_y; - }; - - int32_t step_z() const - { - return _step_z; - }; - -private: - GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format) - { - // Output can only be in output mode - assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly); - assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly); - - auto tensor = _tensor_info_id->shape; - - GpuSampler sampler; - sampler.format = tensor_format; - sampler.storage = tensor_storage; - sampler.address_mode_x = TensorSamplerAddressModeX::None; - sampler.address_mode_y = TensorSamplerAddressModeY::None; - sampler.address_mode_z = TensorSamplerAddressModeZ::None; - - // In the case of texture, we do not need any special checks at the border - if (tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr) - { - int32_t dim_x = 0; - int32_t dim_y = 0; - int32_t dim_z = 0; - - switch (tensor_format) - { - case TensorSamplerFormat::C_W_H: - dim_x = tensor[0]; - dim_y = tensor[1]; - dim_z = tensor[2]; - break; - case TensorSamplerFormat::C_WH_1: - dim_x = tensor[0]; - dim_y = tensor[1] * tensor[2]; - dim_z = 1; - break; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; - } - - if ((dim_x % _step_x) != 0 && dim_x != 1) - { - sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin; - } - - if ((dim_y % _step_y) != 0 && dim_y != 1) - { - sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly; - } - - if ((dim_z % _step_z) != 0 && dim_z != 1) - { - sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly; - } - } - - return sampler; - } - - GpuSampler _sampler{}; // GpuSampler - int32_t _step_x{1}; - int32_t _step_y{1}; - int32_t _step_z{1}; - const TensorInfo *_tensor_info_id{nullptr}; - bool _is_initialized{false}; -}; - -/** - * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer. - */ -class TensorOperand -{ -public: - TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler) - { - } - - TensorOperand &operator=(const TensorOperand &t) - { - _str = t.value(); - _sampler = t.sampler(); - return *this; - } - - std::string value() const - { - return _str; - } - - GpuSampler sampler() const - { - return _sampler; - } - -private: - std::string _str{}; - GpuSampler _sampler{}; -}; - -/** - * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer - * This data structure must be initialized before being passed to the Gpu Kernel Writer - * - */ -class GpuKernelWriterDataHolder -{ -public: - /** - * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store - * the GPU target and target specific capabilities (extensions). For now, we just initialize the - * programming language - * - * @param[in] language Gpu programming language to use - */ - GpuKernelWriterDataHolder(GpuTargetLanguage language) - : tiles(language), arguments(language), code(""), _language(language) - { - } - - /** - * @brief Get the Gpu programming language used - * - * @return GpuTargetLanguage the Gpu programming language - */ - GpuTargetLanguage programming_language() const - { - return _language; - } - - /** - * @brief @ref GpuTileRegistry - * - */ - GpuTileRegistry tiles{}; - /** - * @brief @ref GpuTensorArgumentRegistry - * - */ - GpuTensorArgumentRegistry arguments{}; - /** - * @brief @ref GpuOutputSampler. - * - */ - GpuOutputSampler output_sampler{}; - /** - * @brief Source code - * - */ - std::string code{}; - - // GpuExtensionRegistry extensions{}; -private: - GpuTargetLanguage _language; -}; - -struct LWS -{ - int32_t x{1}; - int32_t y{1}; - int32_t z{1}; -}; - -/** - * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker - * declare an anonymous tile in the tile registry. - */ -class OperandUnpacker -{ -public: - OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) : _tiles(tiles), _arguments(arguments) - { - // Increase the level of the stack to allocate possible temporary tiles - _tiles.increment_registry_level(); - }; - - ~OperandUnpacker() - { - // Decrease the level of the stack to deallocate any temporary tiles - _tiles.decrement_registry_level(); - } - - IVectorTile *unpack(const Operand &src) - { - // Get the tile - if (src.type() == OperandType::Tile) - { - assert(_tiles.has_tile(src.value())); - return _tiles[src.value()]; - } - // Create an anonymous tile with a constant - else if (static_cast<int32_t>(src.type()) & 0x00001000) - { - if (src.type() == OperandType::ScalarTile) - { - ScalarTileCoord coord = src.scalar_tile_coordinate(); - assert(_tiles.has_tile(src.value())); - assert(coord.x >= 0); - assert(coord.y >= 0); - auto val = _tiles[src.value()]->scalar(coord.x, coord.y); - return _tiles.insert({{{val.str}}}, val.type.dt); - } - else - { - return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type())); - } - } - // Create an anonymous tile with the tensor component - else - { - assert(_arguments.has_tensor_argument(src.value())); - auto x = _arguments[src.value()]; - const std::string val = x->component(to_tensor_component(src.type())); - const DataType dt = x->component_data_type(); - return _tiles.insert({{{val}}}, dt); - } - } - -private: - DataType to_tile_data_type(OperandType x) - { - return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff); - } - - TensorComponentType to_tensor_component(OperandType x) - { - switch (x) - { - case OperandType::TensorDim0: - return TensorComponentType::Dim0; - case OperandType::TensorDim1: - return TensorComponentType::Dim1; - case OperandType::TensorDim2: - return TensorComponentType::Dim2; - case OperandType::TensorDim3: - return TensorComponentType::Dim3; - case OperandType::TensorDim4: - return TensorComponentType::Dim4; - case OperandType::TensorStride1: - return TensorComponentType::Stride1; - case OperandType::TensorStride2: - return TensorComponentType::Stride2; - case OperandType::TensorStride3: - return TensorComponentType::Stride3; - case OperandType::TensorStride4: - return TensorComponentType::Stride4; - case OperandType::TensorDim1xDim2: - return TensorComponentType::Dim1xDim2; - case OperandType::TensorDim1xDim2xDim3: - return TensorComponentType::Dim1xDim2xDim3; - case OperandType::TensorDataOffset: - return TensorComponentType::OffsetFirstElement; - default: - assert(false); - return TensorComponentType::Unknown; - } - } - - GpuTileRegistry &_tiles; - GpuTensorArgumentRegistry &_arguments; -}; - -/** - * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker - * declare an anonymous tile in the tile registry. - * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure. - */ -class TensorOperandUnpacker -{ -public: - TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) : _arguments(arguments){}; - - IGpuTensorArgument *unpack(const TensorOperand &src) - { - assert(_arguments.has_tensor_argument(src.value())); - return _arguments[src.value()]; - } - -private: - GpuTensorArgumentRegistry &_arguments; -}; - -/** - * @brief The GpuKernel will be used in three occasions (stages): - * #- Compilation stage - * #- Tuning stage - * #- Dispatch stage - */ -struct GpuKernel -{ - // Compilation stage - std::string code{}; // Source code, required for the compilation stage - std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage - // Tuning stage - std::string config_id{}; // Unique id, required for the tuning stage - std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage - // Dispatch stage - GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage - std::vector<std::pair<int32_t, GpuTensorStorage>> - list_tensor_storages; // List of tensor storages, required for the dispatch stage - std::vector<std::pair<int32_t, TensorComponentType>> - list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage) -}; - -// Generate all extension pragmas (hardcoded for now) -inline std::string generate_extensions() -{ - std::string ext = R"( -#if defined(cl_khr_fp16) -#pragma OPENCL EXTENSION cl_khr_fp16 : enable -#endif // defined(cl_khr_fp16) - -#if defined(cl_arm_integer_dot_product_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable -#endif // defined(cl_arm_integer_dot_product_int8) - -#if defined(cl_arm_integer_dot_product_accumulate_int8) -#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable -#endif // defined(cl_arm_integer_dot_product_accumulate_int8) - -#if defined(cl_arm_printf) -#pragma OPENCL EXTENSION cl_arm_printf : enable -#endif // defined(cl_arm_printf); -)"; - return ext; -} - -// This function should produce an object with the source -inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name) -{ - std::string code; - code += generate_extensions(); - code += "__kernel void "; - code += name; - code += "(\n"; - - auto IdSpaces = in.arguments.IdSpace_declarations(); - - std::vector<std::string> arg_str; - - auto tensor_args = in.arguments.tensor_argument_declarations(); - - for (auto &i : tensor_args) - { - // For each tensor used, get the storage and tensor components - auto storages = i->storage_declarations(); - auto components = i->component_declarations(); - - for (auto &y : storages) - { - std::string str; - str += i->storage_type_declaration(y); - str += " "; - str += i->storage(y); - arg_str.push_back(str); - } - - for (auto &y : components) - { - std::string str; - str += i->component_type_declaration(); - str += " "; - str += i->component(y); - arg_str.push_back(str); - } - } - - for (size_t i = 0; i < arg_str.size(); ++i) - { - code += arg_str[i]; - if (i + 1 < arg_str.size()) - { - code += ",\n"; - } - } - - code += ")\n"; - code += "{\n"; - code += in.code; - code += "}\n"; - - return code; -} - -/** - * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know - * how to reduce the dimensionality of a tensor - * - */ -class GpuTensor3dMapper -{ -public: - GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor){}; - - std::string tensor_component_x() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Dim0); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_y() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return _tensor->component(TensorComponentType::Dim1xDim2); - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Dim1); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_z() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return "1"; - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Dim2); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_stride_y() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Stride1); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_stride_z() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return "0"; - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Stride2); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - std::string tensor_component_stride_batch() const - { - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return _tensor->component(TensorComponentType::Stride3); - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_x() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return t.shape[0] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_y() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return (t.shape[1] * t.shape[2]) == 1; - case TensorSamplerFormat::C_W_H: - return t.shape[1] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_z() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - return true; - case TensorSamplerFormat::C_W_H: - return t.shape[2] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - bool is_one_component_batch() const - { - auto t = _tensor->format(); - const auto format = _sampler.format; - switch (format) - { - case TensorSamplerFormat::C_WH_1: - case TensorSamplerFormat::C_W_H: - return t.shape[3] == 1; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - return ""; - } - } - - GpuSampler gpu_sampler() const - { - return _sampler; - } - - IGpuTensorArgument *tensor_argument() const - { - return _tensor; - } - -private: - GpuSampler _sampler; - IGpuTensorArgument *_tensor; -}; - -struct GpuKernelWriterAttribute -{ - bool return_tensor_component_by_value{false}; -}; - -enum class RoundingMode -{ - None, - Rte, - Rtz, - Rtp, - Rtn -}; - -// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html -class IGpuKernelWriter -{ -public: - virtual ~IGpuKernelWriter() = default; - - virtual void set_IdSpace(int32_t id) = 0; - - virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0; - - virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0; - - virtual void declare_tile(const std::string &name, const TileInfo &info) = 0; - - virtual void - declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0; - - virtual void write_text(const std::string &x) = 0; - - virtual void compound_statement_begin() = 0; - - virtual void compound_statement_end() = 0; - - // Operations - virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0; - - virtual void - op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0; - - virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0; - - virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0; - - virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0; - - virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - - virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0; - - virtual void - op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0; - - virtual void op_binary_elementwise_function(const Operand &dst_name, - BinaryFunction func, - const Operand &first_name, - const Operand &second_name) = 0; - - virtual void op_ternary_elementwise_function(const Operand &dst_name, - TernaryFunction func, - const Operand &first_name, - const Operand &second_name, - const Operand &third_name) = 0; - - virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - - virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - - virtual void op_else_header() = 0; - - virtual void op_for_loop_header(const Operand &var_name, - BinaryOp cond_op, - const Operand &cond_value, - const Operand &update_var, - AssignmentOp update_op, - const Operand &update_value) = 0; - - virtual void op_load_indirect(const TensorOperand &tensor, - const Operand &dst, - const Operand &x, - const Operand &y_indirect, - const Operand &z, - const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; - - virtual void op_load_immediate(const TensorOperand &tensor, - const Operand &dst, - const Operand &x, - const Operand &y, - const Operand &z, - const Operand &b = Operand("0", OperandType::ScalarInt32), - const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; - - virtual void op_store_immediate(const TensorOperand &tensor, - const Operand &src, - const Operand &x, - const Operand &y, - const Operand &z, - const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; - - virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0; - - virtual void op_return() = 0; - - // Utils - // It is the process of converting - virtual void util_get_indirect_buffer(const Operand &dst, - const TensorOperand &tensor, - const Operand &x, - const Operand &y, - const Operand &x_off, - const Operand &y_off) = 0; -}; - -enum class GpuLoadStoreType -{ - Load = 1, - Store = 2 -}; - -class IGpuLoadStoreHelperWriter -{ -public: - IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type) - : _writer(x), _mapper(mapper), _type(type) - { - } - - IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default; - - IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default; - - virtual ~IGpuLoadStoreHelperWriter() = default; - - virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0; - - virtual void write(const std::pair<int32_t, std::string> &y) = 0; - - virtual void finalize() = 0; - -protected: - IGpuKernelWriter *_writer; - GpuTensor3dMapper _mapper; - GpuLoadStoreType _type; -}; - -class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter -{ -public: - ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) - : IGpuLoadStoreHelperWriter(x, mapper, type) - { - } - - ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default; - - ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default; - - static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) - { - CKW_UNUSED(x, type, dst); - - if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr) - { - return false; - } - return true; - } - - void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override - { - assert(validate(_writer, _mapper, _type, dst)); - - _dst = dst; - _ls_width_full = dst->format().w; - - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - _coord_orig_z = _coord_z; - - out_of_bound_initialize_x(_coord_x); - out_of_bound_initialize_z(_coord_z); - - /* - meaning of else: - - x: partial load/store - - y: no load/store operation - - z: no load/store operation - if(x) - { - if(z) - { - if(y) - { - // full load/store width - } - else - { - // no load/store - } - } - else - { - // no load/store - } - } - else - { - if(z) - { - if(y) - { - // partial load/store width - } - else - { - // no load/store - } - } - else - { - // no load/store - } - } - */ - } - - void write(const std::pair<int32_t, std::string> &y) override - { - int32_t idx_y = y.first; - std::string coord_y = y.second; - - // The only check required is on Y. - out_of_bound_initialize_y(coord_y); - - const std::string dst = _dst->vector(idx_y).str; - const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b); - const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address); - - _writer->write_text(ls_buf); - _writer->write_text(";\n"); - - out_of_bound_finalize_y(dst); - - // The left over load/store will be written in the finalize stage - if (_ls_width_part.size() != 0) - { - int32_t w = 0; - for (auto &p : _ls_width_part) - { - const std::string dst0 = _dst->vector(w, p, idx_y).str; - const std::string coord_x = _coord_x + " + " + std::to_string(w); - const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b); - const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address); - _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0)); - - w += p; - } - } - } - - void finalize() override - { - out_of_bound_finalize_z(); - out_of_bound_finalize_x(); - } - -private: - IVectorTile *_dst{nullptr}; - int32_t _ls_width_full{0}; - std::vector<int32_t> _ls_width_part{}; - std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{}; - std::string _coord_x{}; - std::string _coord_z{}; - std::string _coord_orig_z{}; - std::string _coord_b{}; - - void out_of_bound_initialize_x(std::string &coord) - { - if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) - { - auto tensor_format = _mapper.tensor_argument()->format(); - auto shape = tensor_format.shape; - - _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full); - if (_ls_width_part.size() != 0) - { - _writer->write_text("if(" + coord + " > 0)\n"); - _writer->compound_statement_begin(); - } - } - }; - - void out_of_bound_finalize_x() - { - if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) - { - if (_ls_width_part.size() != 0) - { - _writer->compound_statement_end(); - _writer->write_text("else\n"); - _writer->compound_statement_begin(); - - out_of_bound_initialize_z(_coord_orig_z); - for (auto &i : _leftovers_x) - { - out_of_bound_initialize_y(i.first.second); - _writer->write_text(i.second); - _writer->write_text(";\n"); - out_of_bound_finalize_y(i.first.first); - } - out_of_bound_finalize_z(); - _writer->compound_statement_end(); - } - } - }; - - void out_of_bound_initialize_y(std::string &coord) - { - std::string max = ""; - - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::ClampToBorder: - // NOTE: This line should not be moved outside of the switch statement. - // The reason for that is because when we query the component, the component is marked as used - // and added to the list of arguments of the kernel. Since, not in all cases this component is required, - // we should request the component only when used - max = _mapper.tensor_component_y(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - max = _mapper.tensor_component_y(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::ClampToNearest: - max = _mapper.tensor_component_y(); - coord = "clamp(" + coord + ", 0, " + max + " - 1)"; - break; - case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - max = _mapper.tensor_component_y(); - coord = "min(" + coord + ", " + max + " - 1)"; - break; - case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - coord = "max(" + coord + ", 0)"; - break; - case TensorSamplerAddressModeY::None: - break; - default: - std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; - assert(false); - } - }; - - void out_of_bound_finalize_y(const std::string &dst) - { - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - _writer->compound_statement_end(); - break; - case TensorSamplerAddressModeY::None: - break; - - default: - assert(false); - } - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - _writer->write_text("else\n"); - _writer->compound_statement_begin(); - _writer->write_text(dst); - _writer->write_text(" = 0.0f;\n"); - _writer->compound_statement_end(); - break; - case TensorSamplerAddressModeY::None: - break; - - default: - assert(false); - } - }; - - void out_of_bound_initialize_z(std::string &coord) - { - std::string max = ""; - - const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - - switch (address_mode_z) - { - case TensorSamplerAddressModeZ::Skip: - max = _mapper.tensor_component_z(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeZ::SkipMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: - max = _mapper.tensor_component_z(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeZ::ClampToNearest: - max = _mapper.tensor_component_z(); - coord = "clamp(" + coord + ", 0, " + max + " - 1)"; - break; - case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly: - max = _mapper.tensor_component_z(); - coord = "min(" + coord + ", " + max + " - 1)"; - break; - case TensorSamplerAddressModeZ::ClampToMinEdgeOnly: - coord = "max(" + coord + ", 0)"; - break; - case TensorSamplerAddressModeZ::None: - break; - default: - std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; - assert(false); - } - }; - - void out_of_bound_finalize_z() - { - const auto address_mode_z = _mapper.gpu_sampler().address_mode_z; - - switch (address_mode_z) - { - case TensorSamplerAddressModeZ::Skip: - case TensorSamplerAddressModeZ::SkipMinEdgeOnly: - case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: - _writer->compound_statement_end(); - break; - case TensorSamplerAddressModeZ::None: - break; - - default: - assert(false); - } - }; - - std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const - { - std::vector<int32_t> x; - - switch (ls_leftover_vector_width) - { - case 0: - break; - case 1: - case 2: - case 3: - case 4: - case 8: - case 16: - x.push_back(ls_leftover_vector_width); - break; - case 5: - x.push_back(4); - x.push_back(1); - break; - case 6: - x.push_back(4); - x.push_back(2); - break; - case 7: - x.push_back(4); - x.push_back(3); - break; - case 9: - x.push_back(8); - x.push_back(1); - break; - case 10: - x.push_back(8); - x.push_back(2); - break; - case 11: - x.push_back(8); - x.push_back(3); - break; - case 12: - x.push_back(8); - x.push_back(4); - break; - case 13: - x.push_back(8); - x.push_back(4); - x.push_back(1); - break; - case 14: - x.push_back(8); - x.push_back(4); - x.push_back(2); - break; - case 15: - x.push_back(8); - x.push_back(4); - x.push_back(3); - break; - - default: - assert(false); - } - return x; - } - - std::string - to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, const std::string &address) - { - switch (type) - { - case GpuLoadStoreType::Load: - if (vector_width != 1) - { - return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; - } - else - { - return data + " = *(" + address + ")"; - } - break; - case GpuLoadStoreType::Store: - if (vector_width != 1) - { - return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; - } - else - { - return "*(" + address + ") = " + data; - } - break; - default: - std::cout << "Unsupported GpuLoadStoreType" << std::endl; - assert(false); - return ""; - } - } - - std::string - to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const - { - auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage); - assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr); - const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); - const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); - - std::string address; - address += "(__global "; - address += dst_type; - address += "*)("; - address += ptr_buf; - if (x != "0" && (_mapper.is_one_component_x() != true)) - { - address += " + ("; - address += x + ") * sizeof(" + dst_type + ")"; - } - if (y != "0") - { - const std::string stride_y = _mapper.tensor_component_stride_y(); - address += " + ("; - address += y + ")"; - address += " * "; - address += stride_y; - } - if (z != "0") - { - const std::string stride_z = _mapper.tensor_component_stride_z(); - address += " + ("; - address += z + ")"; - address += " * "; - address += stride_z; - } - if (b != "0" && (_mapper.is_one_component_batch() != true)) - { - const std::string stride_b = _mapper.tensor_component_stride_batch(); - address += " + ("; - address += b + ")"; - address += " * "; - address += stride_b; - } - address += ")"; - return address; - } -}; - -class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter -{ -public: - static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst) - { - CKW_UNUSED(x); - - if (dst->format().w != 4) - { - return false; - } - if (mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None) - { - return false; - } - if (mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None) - { - return false; - } - if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load) - { - return false; - } - if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && - type == GpuLoadStoreType::Store) - { - return false; - } - if ((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16)) - { - return false; - } - return true; - /* - - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4 - - z: Only GpuSamplerAddressModeZ::None is supported - */ - } - - ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) - : IGpuLoadStoreHelperWriter(x, mapper, type) - { - } - - ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default; - - ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default; - - void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override - { - assert(validate(_writer, _mapper, _type, dst)); - - _dst = dst; - _ls_width_full = dst->format().w; - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; - - /* - if(y) - { - // full load/store width - } - else - { - // no load/store - } - */ - } - - void write(const std::pair<int32_t, std::string> &y) override - { - int32_t idx_y = y.first; - std::string coord_y = y.second; - - // The only check required is on Y. - out_of_bound_initialize_y(coord_y); - - const std::string dst = _dst->vector(idx_y).str; - const std::string sampler = to_ls_image2d_sampler(); - const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b); - const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord); - - _writer->write_text(ls_buf); - _writer->write_text(";\n"); - - out_of_bound_finalize_y(dst); - } - - void finalize() override - { - } - -private: - IVectorTile *_dst{nullptr}; - int32_t _ls_width_full{0}; - std::string _coord_x{}; - std::string _coord_z{}; - std::string _coord_b{}; - - void out_of_bound_initialize_y(std::string &coord) - { - std::string max = ""; - - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::Skip: - max = _mapper.tensor_component_y(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - max = _mapper.tensor_component_y(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToNearest: - case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - case TensorSamplerAddressModeY::None: - break; - default: - std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl; - assert(false); - } - }; - - void out_of_bound_finalize_y(const std::string &dst) - { - CKW_UNUSED(dst); - - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - _writer->compound_statement_end(); - break; - - default: - assert(false); - } - }; - - std::string to_ls_image2d(GpuLoadStoreType type, - int32_t vector_width, - const std::string &data, - const std::string &sampler, - const std::string &coord) - { - CKW_UNUSED(vector_width); - - auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage); - const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage); - const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h"; - - switch (type) - { - case GpuLoadStoreType::Load: - return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")"; - break; - case GpuLoadStoreType::Store: - return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")"; - default: - assert(false); - std::cout << "Unsupported GpuLoadStoreType" << std::endl; - assert(false); - return ""; - } - } - - std::string to_ls_image2d_sampler() const - { - const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; - - switch (address_mode_y) - { - case TensorSamplerAddressModeY::None: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; - case TensorSamplerAddressModeY::Skip: - case TensorSamplerAddressModeY::SkipMinEdgeOnly: - case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorder: - case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST"; - case TensorSamplerAddressModeY::ClampToNearest: - case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST"; - default: - std::cout << "Unsupported address_mode_coord" << std::endl; - assert(false); - return ""; - } - } - - std::string - to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const - { - std::string coord_x = "(" + x + ") >> 2"; - std::string coord_y = "("; - - if (y != "0") - { - coord_y += y; - } - if (z != "0" && (_mapper.is_one_component_z() != true)) - { - const std::string dim = _mapper.tensor_component_y(); - coord_y += " + ("; - coord_y += z + ")"; - coord_y += " * "; - coord_y += dim; - } - if (b != "0" && (_mapper.is_one_component_batch() != true)) - { - const std::string dim0 = _mapper.tensor_component_y(); - const std::string dim1 = _mapper.tensor_component_z(); - coord_y += " + ("; - coord_y += b + ")"; - coord_y += " * "; - coord_y += dim0; - coord_y += " * "; - coord_y += dim1; - } - coord_y += ")"; - return "(int2)(" + coord_x + ", " + coord_y + ")"; - } -}; - -/** IGpuLoadStoreHelperWriter factory class */ -class ClLoadStoreHelperWriterFactory final -{ -public: - /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper - * - * - * @return IGpuLoadStoreHelperWriter - */ - static std::unique_ptr<IGpuLoadStoreHelperWriter> - create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) - { - const auto tensor_storage = mapper.gpu_sampler().storage; - switch (tensor_storage) - { - case GpuSamplerTensorStorage::BufferUint8Ptr: - return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type); - case GpuSamplerTensorStorage::Image2dReadOnly: - case GpuSamplerTensorStorage::Image2dWriteOnly: - return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type); - default: - std::cout << "Unsupported Gpu tensor storage" << std::endl; - assert(false); - return nullptr; - } - } -}; - -// This utility method needs to go in utils.h -inline bool is_tile_scalar(const IVectorTile *x) -{ - return x->format().w == 1 && x->format().h == 1; -} - -class ClKernelWriter : public IGpuKernelWriter -{ -public: - ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) - { - _data = x; - _attr = attr; - } - - ClKernelWriter(const ClKernelWriter &) = default; - - ClKernelWriter &operator=(const ClKernelWriter &) = default; - - // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure - // there are no conflicts or ambiguity in the code - void set_IdSpace(int32_t id) override - { - _data->tiles.set_IdSpace(id); - _data->arguments.set_IdSpace(id); - } - - void import_tile(const std::string &dst_name, const IVectorTile *src) override - { - _data->tiles.insert(dst_name, src); - } - - void declare_argument(const std::string &name, const TensorInfo &tensor) override - { - assert(_data->arguments[name] == nullptr); - _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value); - } - - void declare_tile(const std::string &name, const TileInfo &format) override - { - assert(_data->tiles[name] == nullptr); - _data->tiles.insert(name, format); - - IVectorTile *x = _data->tiles[name]; - - for (auto &t : x->underlying_source_variables()) - { - _data->code += t.type.str + " " + t.str + ";\n"; - } - } - - void - declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) override - { - assert(_data->tiles[name] == nullptr); - _data->tiles.insert(name, in, dt); - // Note: A constant does not need to be declared in the code - } - - void write_text(const std::string &x) override - { - _data->code += x; - } - - void compound_statement_begin() override - { - _data->tiles.increment_registry_level(); - _data->code += "{\n"; - } - - void compound_statement_end() override - { - _data->tiles.decrement_registry_level(); - _data->code += "}\n"; - } - - void op_get_global_id(const Operand &dst_var, int32_t dim) override - { - assert(dst_var.type() == OperandType::Tile); - assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && - _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable - - auto var = _data->tiles[dst_var.value()]; - - _data->code += var->scalar(0, 0).str; - _data->code += " = get_global_id("; - _data->code += std::to_string(dim); - _data->code += ");\n"; - }; - - void op_get_global_coord(const Operand &o_dst, - const Operand &o_step, - const TensorOperand &o_tensor, - int32_t dim) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto step = operands.unpack(o_step); - - // Validation: Check that x, y and z are scalar - - TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - switch (dim) - { - case 0: - if (mapper.is_one_component_x()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - if (mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) - { - // Validation: Check: fixed tensor shape - // TO BE CHANGED - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(0) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(0) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - } - break; - case 1: - if (mapper.is_one_component_y()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - if (mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) - { - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(1) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - } - break; - case 2: - if (mapper.is_one_component_z()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(2) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - break; - default: - break; - } - }; - - void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *dst = operands.unpack(o_dst); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - if (mapper.is_one_component_batch()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - std::cout << "Unsupported batched computation" << std::endl; - assert(false); - } - }; - - void op_get_global_size(const Operand &dst_var, int32_t dim) override - { - assert(dst_var.type() == OperandType::Tile); - assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && - _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable - - auto var = _data->tiles[dst_var.value()]; - - _data->code += var->scalar(0, 0).str; - _data->code += " = get_global_size("; - _data->code += std::to_string(dim); - _data->code += ");\n"; - } - - void op_unary_expression(const Operand &dst_name, UnaryOp op, const Operand &src_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *src = operands.unpack(src_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const int32_t src_w = src->format().w; - const std::string dt = dst->underlying_source_variables()[0].type.str; - - const bool broadcast_src_x = dst_w != 1 && src_w == 1; - - const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - _data->code += to_string(op); - _data->code += src_prefix + src->vector(y).str; - _data->code += ";\n"; - } - } - - void op_binary_expression(const Operand &dst_name, - const Operand &lhs_name, - BinaryOp op, - const Operand &rhs_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *lhs = operands.unpack(lhs_name); - const IVectorTile *rhs = operands.unpack(rhs_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - assert(lhs != nullptr); - const int32_t lhs_w = lhs->format().w; - const int32_t rhs_w = rhs->format().w; - - if (op == BinaryOp::MatMul_Nt_T) - { - assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16)); - for (int32_t y = 0; y < dst_h; ++y) - { - for (int32_t x = 0; x < dst_w; ++x) - { - for (int32_t k = 0; k < lhs_w; ++k) - { - _data->code += dst->scalar(x, y).str; - _data->code += " = fma("; - _data->code += lhs->scalar(k, y).str; - _data->code += ", "; - _data->code += rhs->scalar(k, x).str; - _data->code += ", "; - _data->code += dst->scalar(x, y).str; - _data->code += ");\n"; - } - } - } - - return; - } - - const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1; - const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1; - - const std::string lhs_prefix = - broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - const std::string rhs_prefix = - broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - const std::string op_str = to_string(op); - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - _data->code += lhs_prefix + lhs->vector(y).str; - _data->code += " "; - _data->code += op_str; - _data->code += " "; - _data->code += rhs_prefix + rhs->vector(y).str; - _data->code += ";\n"; - } - }; - - void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *src = operands.unpack(o_src); - const IVectorTile *dst = operands.unpack(o_dst); - // const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; - const bool is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16); - const std::string sat = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : ""); - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = convert_" + dt + sat + "("; - _data->code += src->vector(y).str; - _data->code += ");\n"; - } - }; - - void op_assign(const Operand &dst_name, const Operand &src_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *src = operands.unpack(src_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const int32_t src_w = src->format().w; - const std::string dt = dst->underlying_source_variables()[0].type.str; - - const bool broadcast_src_x = dst_w != 1 && src_w == 1; - - const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - _data->code += src_prefix + src->vector(y).str; - _data->code += ";\n"; - } - } - - void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *src = operands.unpack(src_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; - - // Always perform an explicit cast. This automatically covers at least the 2 scenarios: - // 1. Widen a scalar into a vector type. This enables scalar-vector broadcasting - // 2. Ensure non-ambiguity over function overloads. - // E.g. a constant tile may be accidentally initialized with a double literal. By casting it to single float, - // it avoids ambiguous function calls - const std::string src_prefix = "(" + dt + ")"; - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - - switch (func) - { - case UnaryFunction::Exp: - _data->code += "exp("; - break; - case UnaryFunction::Tanh: - _data->code += "tanh("; - break; - case UnaryFunction::Sqrt: - _data->code += "sqrt("; - break; - case UnaryFunction::Erf: - _data->code += "erf("; - break; - case UnaryFunction::Fabs: - _data->code += "fabs("; - break; - case UnaryFunction::Log: - _data->code += "log("; - break; - case UnaryFunction::SizeOf: - _data->code += "sizeof("; - break; - case UnaryFunction::Round: - _data->code += "round("; - break; - case UnaryFunction::Floor: - _data->code += "floor("; - break; - default: - CKW_ASSERT_MSG(false, "Unexpected UnaryFunction used."); - } - - _data->code += src_prefix + src->vector(y).str; - _data->code += ");\n"; - } - } - - void op_binary_elementwise_function(const Operand &dst_name, - BinaryFunction func, - const Operand &first_name, - const Operand &second_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *first = operands.unpack(first_name); - const IVectorTile *second = operands.unpack(second_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_h = dst->format().h; - const auto datatype = dst->underlying_source_variables()[0].type; - const std::string datatype_str = datatype.str; - - // Always perform an explicit cast. See similar comments in op_unary_elementwise_function - const std::string first_prefix = "(" + datatype_str + ")"; - const std::string second_prefix = "(" + datatype_str + ")"; - - const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16); - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - - switch (func) - { - case BinaryFunction::Min: - _data->code += is_float ? "fmin(" : "min("; - break; - case BinaryFunction::Max: - _data->code += is_float ? "fmax(" : "max("; - break; - default: - CKW_ASSERT_MSG(false, "Unexpected BinaryFunction used."); - } - - _data->code += first_prefix + first->vector(y).str; - _data->code += ", "; - _data->code += second_prefix + second->vector(y).str; - _data->code += ");\n"; - } - } - - void op_ternary_elementwise_function(const Operand &dst_name, - TernaryFunction func, - const Operand &first_name, - const Operand &second_name, - const Operand &third_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *first = operands.unpack(first_name); - const IVectorTile *second = operands.unpack(second_name); - const IVectorTile *third = operands.unpack(third_name); - const IVectorTile *dst = operands.unpack(dst_name); - - const int32_t dst_h = dst->format().h; - const std::string dt = dst->underlying_source_variables()[0].type.str; - - // Always perform an explicit cast. See similar comments in op_unary_elementwise_function - const std::string first_prefix = "(" + dt + ")"; - const std::string second_prefix = "(" + dt + ")"; - const std::string third_prefix = "(" + dt + ")"; - - // Broadcasting on Y is automatic - for (int32_t y = 0; y < dst_h; ++y) - { - _data->code += dst->vector(y).str; - _data->code += " = "; - - switch (func) - { - case TernaryFunction::Select: - _data->code += "select("; - break; - case TernaryFunction::Clamp: - _data->code += "clamp("; - break; - default: - CKW_ASSERT_MSG(false, "Unexpected TernaryFunction used."); - } - - _data->code += first_prefix + first->vector(y).str; - _data->code += ", "; - _data->code += second_prefix + second->vector(y).str; - _data->code += ", "; - _data->code += third_prefix + third->vector(y).str; - _data->code += ");\n"; - } - } - - void op_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *lhs = operands.unpack(o_lhs); - const IVectorTile *rhs = operands.unpack(o_rhs); - - assert(is_tile_scalar(lhs)); - assert(is_tile_scalar(rhs)); - - _data->code += "if("; - _data->code += lhs->scalar(0, 0).str; - _data->code += " "; - _data->code += to_string(op); - _data->code += " "; - _data->code += rhs->scalar(0, 0).str; - _data->code += ")\n"; - } - - void op_else_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override - { - _data->code += "else "; - op_if_header(o_lhs, op, o_rhs); - } - - void op_else_header() override - { - _data->code += "else\n"; - } - - void op_for_loop_header(const Operand &var_name, - BinaryOp cond_op, - const Operand &cond_value_name, - const Operand &update_var_name, - AssignmentOp update_op, - const Operand &update_value_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *var = operands.unpack(var_name); - const IVectorTile *cond_value = operands.unpack(cond_value_name); - const IVectorTile *update_var = operands.unpack(update_var_name); - const IVectorTile *update_value = operands.unpack(update_value_name); - - const int32_t dst_w = var->format().w; - const int32_t dst_h = var->format().h; - - // It must be a scalar variable - CKW_UNUSED(dst_w, dst_h); - assert(dst_w == 1); - assert(dst_h == 1); - - _data->code += "for(; "; - _data->code += var->scalar(0, 0).str; - _data->code += " "; - _data->code += to_string(cond_op); - _data->code += " " + cond_value->scalar(0, 0).str + "; "; - _data->code += update_var->scalar(0, 0).str; - _data->code += " "; - _data->code += to_string(update_op); - _data->code += " " + update_value->scalar(0, 0).str + ")"; - _data->code += "\n"; - } - - void op_load_immediate(const TensorOperand &o_tensor, - const Operand &o_dst, - const Operand &o_x, - const Operand &o_y, - const Operand &o_z, - const Operand &o_batch_idx, - const Operand &dilation_y) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - - // Not const as it requires changes to 'load_writer'. - IVectorTile *dst = operands.unpack(o_dst); - IVectorTile *x = operands.unpack(o_x); - IVectorTile *y = operands.unpack(o_y); - IVectorTile *z = operands.unpack(o_z); - IVectorTile *dil_y = operands.unpack(dilation_y); - IVectorTile *b = operands.unpack(o_batch_idx); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load); - - // Initialize the constant part - load_writer->initialize(dst, x, z, b); - - for (int i = 0; i < dst->format().h; ++i) - { - std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i); - if (dil_y->scalar(0, 0).str != "1") - { - coord_y += " * " + dil_y->scalar(0, 0).str; - } - load_writer->write(std::make_pair(i, coord_y)); - } - - load_writer->finalize(); - } - - void op_load_indirect(const TensorOperand &o_tensor, - const Operand &o_dst, - const Operand &o_x, - const Operand &o_indirect_h, - const Operand &o_z, - const Operand &o_batch_idx) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - - // Not const as it requires changes to 'load_writer'. - IVectorTile *dst = operands.unpack(o_dst); - IVectorTile *x = operands.unpack(o_x); - IVectorTile *y_ind = operands.unpack(o_indirect_h); - IVectorTile *z = operands.unpack(o_z); - IVectorTile *b = operands.unpack(o_batch_idx); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load); - - // Initialize the constant part - load_writer->initialize(dst, x, z, b); - - for (int i = 0; i < dst->format().h; ++i) - { - load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str)); - } - - load_writer->finalize(); - } - - void op_store_immediate(const TensorOperand &tensor_name, - const Operand &src_name, - const Operand &x_name, - const Operand &y_name, - const Operand &z_name, - const Operand &batch_index_name) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - - // Not const as it requires changes to 'load_writer'. - IVectorTile *src = operands.unpack(src_name); - IVectorTile *x = operands.unpack(x_name); - IVectorTile *y = operands.unpack(y_name); - IVectorTile *z = operands.unpack(z_name); - IVectorTile *b = operands.unpack(batch_index_name); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(tensor_name); - auto gpu_sampler = tensor_name.sampler(); - - GpuTensor3dMapper mapper(tensor, gpu_sampler); - - auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store); - - // Initialize the constant part - store_writer->initialize(src, x, z, b); - - int32_t tile_h = src->format().h; - - for (int m0 = tile_h - 1; m0 >= 0; m0--) - { - store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0))); - } - - store_writer->finalize(); - } - - void op_return() override - { - _data->code += "return;\n"; - } - - void util_get_indirect_buffer(const Operand &o_dst, - const TensorOperand &o_tensor, - const Operand &o_x, - const Operand &o_y, - const Operand &o_x_off, - const Operand &o_y_off) override - { - OperandUnpacker operands(_data->tiles, _data->arguments); - const IVectorTile *dst = operands.unpack(o_dst); - const IVectorTile *x = operands.unpack(o_x); - const IVectorTile *y = operands.unpack(o_y); - const IVectorTile *x_off = operands.unpack(o_x_off); - const IVectorTile *y_off = operands.unpack(o_y_off); - - TensorOperandUnpacker tensor_operands(_data->arguments); - IGpuTensorArgument *tensor = tensor_operands.unpack(o_tensor); - - assert(dst->format().w == 1); - assert(x->format().w == 1); - assert(y->format().w == 1); - assert(x_off->format().w == 1); - assert(y_off->format().w == 1); - assert(dst->format().dt == DataType::Int32); - assert(x->format().dt == DataType::Int32); - assert(y->format().dt == DataType::Int32); - assert(x_off->format().dt == DataType::Int32); - assert(y_off->format().dt == DataType::Int32); - - const std::string width = tensor->component(TensorComponentType::Dim1); - const std::string height = tensor->component(TensorComponentType::Dim2); - const std::string wxh = tensor->component(TensorComponentType::Dim1xDim2); - /* - int x_s; - int y_s; - x_s = (xi_0 + x_k); - y_s = (yi_0 + y_k); - mi_0 = x_s + y_s * width + b * widthxheight; - mi_0 = select(-1, mi_0, x_s >= 0); - mi_0 = select(-1, mi_0, y_s >= 0); - mi_0 = select(-1, mi_0, x_s < 128); - mi_0 = select(-1, mi_0, y_s < 128); - */ - compound_statement_begin(); - declare_tile("_x_s", TileInfo(DataType::Int32)); - declare_tile("_y_s", TileInfo(DataType::Int32)); - auto x_s = operands.unpack(Operand("_x_s")); - auto y_s = operands.unpack(Operand("_y_s")); - for (int i = 0; i < dst->format().h; ++i) - { - // x_s = (xi_0 + x_k); - // y_s = (yi_0 + y_k); - _data->code += x_s->scalar(0, i).str; - _data->code += " = ("; - _data->code += x->scalar(0, i).str; - _data->code += " + "; - _data->code += x_off->scalar(0, i).str; - _data->code += ");\n"; - _data->code += y_s->scalar(0, i).str; - _data->code += " = ("; - _data->code += y->scalar(0, i).str; - _data->code += " + "; - _data->code += y_off->scalar(0, i).str; - _data->code += ");\n"; - // mi_0 = x_s + y_s * width; - _data->code += dst->scalar(0, i).str; - _data->code += " = "; - _data->code += x_s->scalar(0, i).str; - _data->code += " + "; - _data->code += y_s->scalar(0, i).str; - _data->code += " * " + width + ";\n"; - // mi_0 = select(wxh, mi_0, x_s >= 0); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += x_s->scalar(0, i).str; - _data->code += " >= 0);\n"; - // mi_0 = select(wxh, mi_0, x_s < width); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += x_s->scalar(0, i).str; - _data->code += " < "; - _data->code += width + ");\n"; - // mi_0 = select(wxh, mi_0, y_s >= 0); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += y_s->scalar(0, i).str; - _data->code += " >= 0);\n"; - // mi_0 = select(wxh, mi_0, y_s < height); - _data->code += dst->scalar(0, i).str; - _data->code += " = select(-1, "; - _data->code += dst->scalar(0, i).str; - _data->code += ", "; - _data->code += y_s->scalar(0, i).str; - _data->code += " < "; - _data->code += height + ");\n"; - } - compound_statement_end(); - } - -private: - GpuKernelWriterDataHolder *_data{nullptr}; - GpuKernelWriterAttribute *_attr{nullptr}; -}; - -/** IGpuKernelWriter factory class */ -class GpuKernelWriterFactory final -{ -public: - /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language - * - * @param[in] gpu GPU target - * - * @return IGpuKernelWriter - */ - static std::unique_ptr<IGpuKernelWriter> create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) - { - switch (x->programming_language()) - { - case GpuTargetLanguage::OpenCL: - return std::make_unique<ClKernelWriter>(attr, x); - default: - std::cout << "Unsupported Gpu programming language" << std::endl; - assert(false); - return nullptr; - } - } -}; - -inline int32_t -adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx) -{ - auto tensor = tensor_info_id->shape; - - int32_t dim[3] = {0}; - - switch (tensor_format) - { - case TensorSamplerFormat::C_W_H: - dim[0] = tensor[0]; - dim[1] = tensor[1]; - dim[2] = tensor[2]; - break; - case TensorSamplerFormat::C_WH_1: - dim[0] = tensor[0]; - dim[1] = tensor[1] * tensor[2]; - dim[2] = 1; - break; - default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; - } - - return std::min(step, dim[idx]); -} - -} // namespace prototype -} // namespace ckw - -#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H diff --git a/compute_kernel_writer/prototype/src/TensorInfo.cpp b/compute_kernel_writer/prototype/src/TensorInfo.cpp deleted file mode 100644 index 561c126469..0000000000 --- a/compute_kernel_writer/prototype/src/TensorInfo.cpp +++ /dev/null @@ -1,77 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TensorInfo.h" - -namespace ckw -{ -TensorInfo::TensorInfo(DataType dt, const TensorShape &shape, TensorDataLayout dl, int32_t id) - : _shape(shape), _dt(dt), _dl(dl), _id(id) -{ -} - -TensorInfo &TensorInfo::shape(const TensorShape &shape) -{ - _shape = shape; - return *this; -} - -TensorShape TensorInfo::shape() const -{ - return _shape; -} - -TensorInfo &TensorInfo::data_type(DataType dt) -{ - _dt = dt; - return *this; -} - -DataType TensorInfo::data_type() const -{ - return _dt; -} - -TensorInfo &TensorInfo::data_layout(TensorDataLayout dl) -{ - _dl = dl; - return *this; -} - -TensorDataLayout TensorInfo::data_layout() const -{ - return _dl; -} - -TensorInfo &TensorInfo::id(int32_t id) -{ - _id = id; - return *this; -} - -int32_t TensorInfo::id() const -{ - return _id; -} -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TensorOperand.cpp b/compute_kernel_writer/prototype/src/TensorOperand.cpp deleted file mode 100644 index d1aefbbb71..0000000000 --- a/compute_kernel_writer/prototype/src/TensorOperand.cpp +++ /dev/null @@ -1,272 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TensorOperand.h" - -#include "ckw/Error.h" -#include "ckw/Kernel.h" -#include "ckw/TensorInfo.h" -#include "ckw/TileOperand.h" - -#include "src/Prototype.h" - -namespace ckw -{ - -namespace -{ - -TensorComponentOperand &get_or_create_component(TensorOperand &tensor, - std::unique_ptr<TensorComponentOperand> &ptr, - TensorComponentType component) -{ - if (ptr == nullptr) - { - ptr = std::make_unique<TensorComponentOperand>(tensor, component); - } - - return *ptr; -} - -} // namespace - -// ================================================================================================= -// TensorOperand -// ================================================================================================= - -TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info, TensorStorageType storage_type) - : OperandBase(name), _info(info), _storage_type(storage_type) -{ -} - -prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const -{ - CKW_UNUSED(writer); - return {name()}; -} - -const TensorInfo &TensorOperand::info() const -{ - return _info; -} - -TensorInfo &TensorOperand::info() -{ - return _info; -} - -TensorStorageType TensorOperand::storage_type() const -{ - return _storage_type; -} - -DataType TensorOperand::data_type() const -{ - return _info.data_type(); -} - -bool TensorOperand::is_constant() const -{ - return false; -} - -const TileOperand &TensorOperand::tile() const -{ - return *_tile; -} - -TileOperand &TensorOperand::tile() -{ - return *_tile; -} - -TensorOperand &TensorOperand::tile(TileOperand &tile) -{ - _tile = &tile; - return *this; -} - -const TensorTileSampler &TensorOperand::tile_sampler() const -{ - return _tile_sampler; -} - -TensorTileSampler &TensorOperand::tile_sampler() -{ - return _tile_sampler; -} - -TensorOperand &TensorOperand::tile_sampler(const TensorTileSampler &value) -{ - _tile_sampler = value; - return *this; -} - -TensorComponentOperand &TensorOperand::stride1() -{ - return get_or_create_component(*this, _stride1, TensorComponentType::Stride1); -} - -TensorComponentOperand &TensorOperand::stride2() -{ - return get_or_create_component(*this, _stride2, TensorComponentType::Stride2); -} - -TensorComponentOperand &TensorOperand::stride3() -{ - return get_or_create_component(*this, _stride3, TensorComponentType::Stride3); -} - -TensorComponentOperand &TensorOperand::stride4() -{ - return get_or_create_component(*this, _stride4, TensorComponentType::Stride4); -} - -TensorComponentOperand &TensorOperand::dim0() -{ - return get_or_create_component(*this, _dim0, TensorComponentType::Dim0); -} - -TensorComponentOperand &TensorOperand::dim1() -{ - return get_or_create_component(*this, _dim1, TensorComponentType::Dim1); -} - -TensorComponentOperand &TensorOperand::dim2() -{ - return get_or_create_component(*this, _dim2, TensorComponentType::Dim2); -} - -TensorComponentOperand &TensorOperand::dim3() -{ - return get_or_create_component(*this, _dim3, TensorComponentType::Dim3); -} - -TensorComponentOperand &TensorOperand::dim4() -{ - return get_or_create_component(*this, _dim4, TensorComponentType::Dim4); -} - -TensorComponentOperand &TensorOperand::dim1_dim2() -{ - return get_or_create_component(*this, _dim1_dim2, TensorComponentType::Dim1xDim2); -} - -TensorComponentOperand &TensorOperand::dim1_dim2_dim3() -{ - return get_or_create_component(*this, _dim1_dim2_dim3, TensorComponentType::Dim1xDim2xDim3); -} - -TensorComponentOperand &TensorOperand::offset_first_element_in_bytes() -{ - return get_or_create_component(*this, _offset_first_element_in_bytes, TensorComponentType::OffsetFirstElement); -} - -// ================================================================================================= -// TensorComponentOperand -// ================================================================================================= - -TensorComponentOperand::TensorComponentOperand(TensorOperand &tensor, TensorComponentType component) - : TileOperand(tensor.name(), DataType::Int32), _tensor(tensor), _component(component) -{ -} - -TensorOperand &TensorComponentOperand::tensor() -{ - return _tensor; -} - -const TensorOperand &TensorComponentOperand::tensor() const -{ - return _tensor; -} - -TensorComponentType TensorComponentOperand::component_type() const -{ - return _component; -} - -prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const -{ - CKW_UNUSED(writer); - prototype::OperandType type{prototype::OperandType::Unknown}; - - switch (_component) - { - case TensorComponentType::OffsetFirstElement: - type = prototype::OperandType::TensorDataOffset; - break; - - case TensorComponentType::Stride1: - type = prototype::OperandType::TensorStride1; - break; - - case TensorComponentType::Stride2: - type = prototype::OperandType::TensorStride2; - break; - - case TensorComponentType::Stride3: - type = prototype::OperandType::TensorStride3; - break; - - case TensorComponentType::Stride4: - type = prototype::OperandType::TensorStride4; - break; - - case TensorComponentType::Dim0: - type = prototype::OperandType::TensorDim0; - break; - - case TensorComponentType::Dim1: - type = prototype::OperandType::TensorDim1; - break; - - case TensorComponentType::Dim2: - type = prototype::OperandType::TensorDim2; - break; - - case TensorComponentType::Dim3: - type = prototype::OperandType::TensorDim3; - break; - - case TensorComponentType::Dim4: - type = prototype::OperandType::TensorDim4; - break; - - case TensorComponentType::Dim1xDim2: - type = prototype::OperandType::TensorDim1xDim2; - break; - - case TensorComponentType::Dim1xDim2xDim3: - type = prototype::OperandType::TensorDim1xDim2xDim3; - break; - - default: - CKW_ASSERT(false); - } - - return prototype::Operand(name(), type); -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp deleted file mode 100644 index bf9f946ce8..0000000000 --- a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp +++ /dev/null @@ -1,191 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TensorTileSampler.h" - -#include "ckw/TileOperand.h" -#include "ckw/types/TensorSamplerTypes.h" - -namespace ckw -{ - -TensorTileSampler::TensorTileSampler() -{ -} - -TensorTileSampler::TensorTileSampler(TileOperand &x, - TileOperand &y, - TileOperand &z, - TileOperand &b, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), - _y(&y), - _z(&z), - _b(&b), - _height(0), - _width(0), - _format(format), - _address_mode_x(address_mode_x), - _address_mode_y(address_mode_y), - _address_mode_z(address_mode_z) -{ -} - -TensorTileSampler::TensorTileSampler(TileOperand &x, - TileOperand &y, - TileOperand &z, - TileOperand &b, - int32_t height, - int32_t width, - TensorSamplerFormat format, - TensorSamplerAddressModeX address_mode_x, - TensorSamplerAddressModeY address_mode_y, - TensorSamplerAddressModeZ address_mode_z) - : _x(&x), - _y(&y), - _z(&z), - _b(&b), - _height(height), - _width(width), - _format(format), - _address_mode_x(address_mode_x), - _address_mode_y(address_mode_y), - _address_mode_z(address_mode_z) -{ -} - -const TileOperand &TensorTileSampler::x() const -{ - return *_x; -} - -TensorTileSampler &TensorTileSampler::x(TileOperand &x) -{ - _x = &x; - return *this; -} - -const TileOperand &TensorTileSampler::y() const -{ - return *_y; -} - -TensorTileSampler &TensorTileSampler::y(TileOperand &y) -{ - _y = &y; - return *this; -} - -const TileOperand &TensorTileSampler::z() const -{ - return *_z; -} - -TensorTileSampler &TensorTileSampler::z(TileOperand &z) -{ - _z = &z; - return *this; -} - -const TileOperand &TensorTileSampler::b() const -{ - return *_b; -} - -TensorTileSampler &TensorTileSampler::b(TileOperand &b) -{ - _b = &b; - return *this; -} - -int32_t TensorTileSampler::width() const -{ - return _width; -} - -TensorTileSampler &TensorTileSampler::width(int32_t width) -{ - _width = width; - return *this; -} - -int32_t TensorTileSampler::height() const -{ - return _height; -} - -TensorTileSampler &TensorTileSampler::height(int32_t height) -{ - _height = height; - return *this; -} - -TensorSamplerFormat TensorTileSampler::format() const -{ - return _format; -} - -TensorTileSampler &TensorTileSampler::format(TensorSamplerFormat format) -{ - _format = format; - return *this; -} - -TensorSamplerAddressModeX TensorTileSampler::address_mode_x() const -{ - return _address_mode_x; -} - -TensorTileSampler &TensorTileSampler::address_mode_x(TensorSamplerAddressModeX address_mode_x) -{ - _address_mode_x = address_mode_x; - return *this; -} - -TensorSamplerAddressModeY TensorTileSampler::address_mode_y() const -{ - return _address_mode_y; -} - -TensorTileSampler &TensorTileSampler::address_mode_y(TensorSamplerAddressModeY address_mode_y) -{ - _address_mode_y = address_mode_y; - return *this; -} - -TensorSamplerAddressModeZ TensorTileSampler::address_mode_z() const -{ - return _address_mode_z; -} - -TensorTileSampler &TensorTileSampler::address_mode_z(TensorSamplerAddressModeZ address_mode_z) -{ - _address_mode_z = address_mode_z; - return *this; -} - -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TileInfo.cpp b/compute_kernel_writer/prototype/src/TileInfo.cpp deleted file mode 100644 index 273266eedc..0000000000 --- a/compute_kernel_writer/prototype/src/TileInfo.cpp +++ /dev/null @@ -1,73 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TileInfo.h" - -namespace ckw -{ -TileInfo::TileInfo(DataType dt) : _dt(dt), _shape({{1, 1}}) -{ -} - -TileInfo::TileInfo(DataType dt, int32_t w) : _dt(dt), _shape({{w, 1}}) -{ -} - -TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) : _dt(dt), _shape({{w, h}}) -{ -} - -TileInfo &TileInfo::width(int32_t w) -{ - _shape[kTileWidthIdx] = w; - return *this; -} - -int32_t TileInfo::width() const -{ - return _shape[kTileWidthIdx]; -} - -TileInfo &TileInfo::height(int32_t h) -{ - _shape[kTileHeightIdx] = h; - return *this; -} - -int32_t TileInfo::height() const -{ - return _shape[kTileHeightIdx]; -} - -TileInfo &TileInfo::data_type(DataType dt) -{ - _dt = dt; - return *this; -} - -DataType TileInfo::data_type() const -{ - return _dt; -} -} // namespace ckw diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp deleted file mode 100644 index e09c833d96..0000000000 --- a/compute_kernel_writer/prototype/src/TileOperand.cpp +++ /dev/null @@ -1,135 +0,0 @@ -/* - * Copyright (c) 2023 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ - -#include "ckw/TileOperand.h" - -#include "ckw/Error.h" - -#include "src/Prototype.h" - -namespace ckw -{ - -TileOperand::TileOperand(const std::string &name, const TileInfo &info) - : OperandBase(name), _info(info), _value{std::vector<std::string>{"0"}}, _constant(false) -{ -} - -TileOperand::TileOperand(const std::string &name, DataType data_type) - : OperandBase(name), _info(TileInfo{data_type}), _value{std::vector<std::string>{"0"}}, _constant(false) -{ -} - -TileOperand::TileOperand(const std::string &name, int32_t value) - : OperandBase(name), - _info(TileInfo{DataType::Int32}), - _value{std::vector<std::string>{std::to_string(value)}}, - _constant(true) -{ -} - -TileOperand::TileOperand(const std::string &name, float value) - : OperandBase(name), - _info(TileInfo{DataType::Fp32}), - _value{std::vector<std::string>{std::to_string(value)}}, - _constant(true) -{ -} - -TileOperand::TileOperand(const std::string &name, const TileContainer &vals, DataType dt) - : OperandBase(name), - _info(TileInfo{dt, static_cast<int32_t>(vals.size()), static_cast<int32_t>(vals[0].size())}), - _value(vals), - _constant(true) -{ -} - -prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const -{ - CKW_UNUSED(writer); - - if (_constant) - { - if (is_scalar()) - { - switch (_info.data_type()) - { - case DataType::Int32: - return prototype::Operand(_value[0][0], prototype::OperandType::ScalarInt32); - - case DataType::Fp32: - return prototype::Operand(_value[0][0], prototype::OperandType::ScalarFp32); - - case DataType::Fp16: - return prototype::Operand(_value[0][0], prototype::OperandType::ScalarFp16); - - default: - CKW_ASSERT(false); - } - } - else - { - return prototype::Operand(name()); - } - } - else - { - return prototype::Operand(name(), prototype::OperandType::Tile); - } -} - -const TileInfo &TileOperand::tile_info() const -{ - return _info; -} - -DataType TileOperand::data_type() const -{ - return _info.data_type(); -} - -bool TileOperand::is_constant() const -{ - return _constant; -} - -bool TileOperand::is_scalar() const -{ - return _info.width() == 1 && _info.height() == 1; -} - -std::string TileOperand::scalar_value() const -{ - CKW_ASSERT(is_scalar()); - CKW_ASSERT(is_constant()); - - return _value[0][0]; -} - -const TileContainer &TileOperand::value() const -{ - return _value; -} - -} // namespace ckw |