aboutsummaryrefslogtreecommitdiff
path: root/compute_kernel_writer/src
diff options
context:
space:
mode:
authorViet-Hoa Do <viet-hoa.do@arm.com>2023-07-03 13:44:43 +0100
committerViet-Hoa Do <viet-hoa.do@arm.com>2023-07-06 09:35:02 +0000
commitce3c48c7af02555f81c0f5e7ef2677916cecef34 (patch)
treea4bf4a6f46bd44655129bf03ee5771e56edd33bd /compute_kernel_writer/src
parent9b392d7113aa181fdadbedcd4910e75ce23c0b3e (diff)
downloadComputeLibrary-ce3c48c7af02555f81c0f5e7ef2677916cecef34.tar.gz
Move CKW prototype to separate directory
Partially resolves: COMPMID-6283 Signed-off-by: Viet-Hoa Do <viet-hoa.do@arm.com> Change-Id: I7596e3dc357d6f0b9cbe66534523943a73c26d81 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9864 Reviewed-by: SiCong Li <sicong.li@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer/src')
-rw-r--r--compute_kernel_writer/src/Kernel.cpp61
-rw-r--r--compute_kernel_writer/src/KernelWriter.cpp227
-rw-r--r--compute_kernel_writer/src/OperandBase.cpp50
-rw-r--r--compute_kernel_writer/src/Prototype.h3767
-rw-r--r--compute_kernel_writer/src/TensorOperand.cpp247
-rw-r--r--compute_kernel_writer/src/TensorTileSampler.cpp167
-rw-r--r--compute_kernel_writer/src/TileOperand.cpp104
-rw-r--r--compute_kernel_writer/src/acl/AclComponentArgument.cpp97
-rw-r--r--compute_kernel_writer/src/acl/AclKernelWriter.cpp50
-rw-r--r--compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp58
10 files changed, 0 insertions, 4828 deletions
diff --git a/compute_kernel_writer/src/Kernel.cpp b/compute_kernel_writer/src/Kernel.cpp
deleted file mode 100644
index bbf5c440a7..0000000000
--- a/compute_kernel_writer/src/Kernel.cpp
+++ /dev/null
@@ -1,61 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/Kernel.h"
-#include "ckw/Types.h"
-#include "src/Prototype.h"
-
-namespace ckw
-{
-
-Kernel::Kernel(const char *name, GpuTargetLanguage language)
- : _name(name), _kernel(std::make_unique<prototype::GpuKernelWriterDataHolder>(language)), _operands{}
-{
-}
-
-Kernel::~Kernel()
-{
-}
-
-const std::string &Kernel::name() const
-{
- return _name;
-}
-
-const std::map<std::string, std::unique_ptr<OperandBase>> &Kernel::operands() const
-{
- return _operands;
-}
-
-std::map<std::string, std::unique_ptr<OperandBase>> &Kernel::operands()
-{
- return _operands;
-}
-
-prototype::GpuKernelWriterDataHolder *Kernel::impl()
-{
- return _kernel.get();
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/src/KernelWriter.cpp b/compute_kernel_writer/src/KernelWriter.cpp
deleted file mode 100644
index 5d79985e87..0000000000
--- a/compute_kernel_writer/src/KernelWriter.cpp
+++ /dev/null
@@ -1,227 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/KernelWriter.h"
-#include "ckw/Error.h"
-#include "ckw/TensorOperand.h"
-#include "src/Prototype.h"
-
-#include <sstream>
-
-namespace ckw
-{
-
-namespace
-{
-
-inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info)
-{
- return prototype::TensorInfo{ info.shape(), info.data_type(), info.data_layout(), info.id() };
-}
-
-} // namespace
-
-// =================================================================================================
-// Constructors and destructor
-// =================================================================================================
-
-KernelWriter::KernelWriter(Kernel &kernel)
- : _kernel(&kernel),
- _impl_attr(std::make_unique<prototype::GpuKernelWriterAttribute>()),
- _impl(prototype::GpuKernelWriterFactory::create(_impl_attr.get(), kernel.impl()))
-{
- _impl->set_IdSpace(1);
-}
-
-KernelWriter::~KernelWriter()
-{
-}
-
-// =================================================================================================
-// Scope management
-// =================================================================================================
-
-int32_t KernelWriter::id_space() const
-{
- return _id_space;
-}
-
-KernelWriter &KernelWriter::id_space(int32_t id_space)
-{
- CKW_ASSERT(id_space <= _max_id_space);
-
- _id_space = id_space;
- return *this;
-}
-
-int32_t KernelWriter::next_id_space()
-{
- id_space(++_max_id_space);
- return _id_space;
-}
-
-// =================================================================================================
-// Tensor and tile declaration
-// =================================================================================================
-
-TensorOperand &KernelWriter::create_tensor_argument(const char *name, const TensorInfo &info)
-{
- const auto var_name = generate_variable_name(name);
-
- _impl->declare_argument(var_name, create_impl_tensor_info(info));
-
- auto operand = new TensorOperand(var_name, info);
- register_operand(operand, false);
-
- return *operand;
-}
-
-TileOperand &KernelWriter::create_tile_argument(const char *name, int32_t value)
-{
- const auto var_name = generate_variable_name(name);
-
- auto operand = new TileOperand(var_name, value);
- register_operand(operand, false);
-
- return *operand;
-}
-
-std::string KernelWriter::generate_variable_name(const char *name) const
-{
- std::stringstream var_name;
-
- var_name << "_" << _id_space << "_" << name;
-
- return var_name.str();
-}
-
-void KernelWriter::register_operand(OperandBase *operand, bool declaring)
-{
- const auto &name = operand->name();
- auto &operands = _kernel->operands();
-
- CKW_ASSERT(operands.find(name) == operands.end());
- operands[name] = std::unique_ptr<OperandBase>(operand);
-
- if(declaring && !operand->is_constant())
- {
- const auto tile = reinterpret_cast<TileOperand *>(operand);
-
- const auto &info = tile->tile_info();
- _impl->declare_tile(tile->name(), prototype::TileInfo(info.data_type(), info.width(), info.height()));
- }
-}
-
-// =================================================================================================
-// Load and store
-// =================================================================================================
-
-void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler)
-{
- prototype::TensorOperand impl_tensor(
- tensor.name(),
- prototype::GpuSampler{
- sampler.format(),
- prototype::GpuSamplerTensorStorage::BufferUint8Ptr,
- sampler.address_mode_x(),
- sampler.address_mode_y(),
- sampler.address_mode_z() });
-
- auto impl_x = sampler.x().create_impl_operand(_impl.get());
- auto impl_y = sampler.y().create_impl_operand(_impl.get());
- auto impl_z = sampler.z().create_impl_operand(_impl.get());
- auto impl_b = sampler.b().create_impl_operand(_impl.get());
-
- auto impl_dst = tile.create_impl_operand(_impl.get());
-
- _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b);
-}
-
-void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler)
-{
- prototype::TensorOperand impl_tensor(
- tensor.name(),
- prototype::GpuSampler{
- sampler.format(),
- prototype::GpuSamplerTensorStorage::BufferUint8Ptr,
- sampler.address_mode_x(),
- sampler.address_mode_y(),
- sampler.address_mode_z() });
- auto impl_src = tile.create_impl_operand(_impl.get());
- auto impl_x = sampler.x().create_impl_operand(_impl.get());
- auto impl_y = sampler.y().create_impl_operand(_impl.get());
- auto impl_z = sampler.z().create_impl_operand(_impl.get());
- auto impl_b = sampler.b().create_impl_operand(_impl.get());
-
- _impl->op_store_immediate(impl_tensor, impl_src, impl_x, impl_y, impl_z, impl_b);
-}
-
-// =================================================================================================
-// Data processing
-// =================================================================================================
-
-void KernelWriter::op_assign(TileOperand &dst, const TileOperand &src)
-{
- auto impl_dst = dst.create_impl_operand(_impl.get());
- auto impl_src = src.create_impl_operand(_impl.get());
-
- _impl->op_assign(impl_dst, impl_src);
-}
-
-void KernelWriter::op_binary_expression(TileOperand &dst, const TileOperand &lhs, const TileOperand &rhs, BinaryOp op)
-{
- auto impl_lhs = lhs.create_impl_operand(_impl.get());
- auto impl_rhs = rhs.create_impl_operand(_impl.get());
- auto impl_dst = dst.create_impl_operand(_impl.get());
-
- _impl->op_binary_expression(impl_dst, impl_lhs, op, impl_rhs);
-}
-
-void KernelWriter::op_scalar_function(TileOperand &dst, const TileOperand &src, ScalarUnaryFunction opcode)
-{
- auto impl_dst = dst.create_impl_operand(_impl.get());
- auto impl_src = src.create_impl_operand(_impl.get());
-
- _impl->op_scalar_function(impl_dst, impl_src, opcode);
-}
-
-// =================================================================================================
-// Misc
-// =================================================================================================
-
-void KernelWriter::op_get_global_id(TileOperand &dst, int32_t dim)
-{
- _impl->op_get_global_id(prototype::Operand(dst.name()), dim);
-}
-
-// =================================================================================================
-// Code generation
-// =================================================================================================
-
-std::string KernelWriter::generate_code()
-{
- return prototype::generate_code(*_kernel->impl(), _kernel->name());
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/src/OperandBase.cpp b/compute_kernel_writer/src/OperandBase.cpp
deleted file mode 100644
index 59cf846cc7..0000000000
--- a/compute_kernel_writer/src/OperandBase.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/OperandBase.h"
-
-namespace ckw
-{
-
-OperandBase::OperandBase(const std::string &name)
- : _name(name)
-{
-}
-
-OperandBase::~OperandBase()
-{
-}
-
-const std::string &OperandBase::name() const
-{
- return _name;
-}
-
-OperandBase &OperandBase::name(const std::string &name)
-{
- _name = name;
- return *this;
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/src/Prototype.h b/compute_kernel_writer/src/Prototype.h
deleted file mode 100644
index 45f1b3d464..0000000000
--- a/compute_kernel_writer/src/Prototype.h
+++ /dev/null
@@ -1,3767 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_SRC_PROTOTYPE_H
-#define CKW_SRC_PROTOTYPE_H
-
-#include <vector>
-#include <map>
-#include <string>
-#include <cstdint> // int32_t
-#include <iostream> // cout (to be removed)
-#include <cassert> // assert (to be removed)
-#include <unordered_map>
-#include <chrono>
-#include <cmath>
-#include <memory>
-#include <algorithm>
-#include <array>
-#include <stdexcept>
-
-#include "ckw/Types.h"
-#include "ckw/TensorInfo.h"
-#include "ckw/Error.h"
-
-namespace ckw
-{
-namespace prototype {
-
-// Dummy data structure for Size2D
-using Size2D = std::vector<int32_t>;
-
-// Dummy Status
-using Status = void;
-
-enum class ComponentType : int32_t
-{
- Complex = 0,
- Simple = 1,
- Unfusable = 2
-};
-
-enum class GpuCompilationSpeed
-{
- Fast = 0x00, // fast compilation may increase the latency of the network
- Slow = 0x01 // slow compilation may decrease the latency of the network
-};
-
-enum class GpuExtensions
-{
- Fp16,
- Dot8,
- Mmul,
- FastMath
-};
-
-struct TensorInfo
-{
- TensorShape shape { {0} };
- DataType data_type { DataType::Unknown };
- TensorDataLayout data_layout { TensorDataLayout::Nhwc };
- int32_t id { -1 };
-};
-
-struct ComponentAttribute
-{
- GpuCompilationSpeed compilation_speed {GpuCompilationSpeed::Fast};
- bool overwrite_tile { true };
-};
-
-inline std::string data_type_to_cl_type(DataType dt)
-{
- switch(dt)
- {
- case DataType::Fp32:
- return "float";
- case DataType::Fp16:
- return "half";
- case DataType::Int8:
- return "char";
- case DataType::Uint8:
- return "uchar";
- case DataType::Uint16:
- return "ushort";
- case DataType::Int16:
- return "short";
- case DataType::Uint32:
- return "uint";
- case DataType::Int32:
- return "int";
- case DataType::Bool:
- return "bool";
- default:
- assert(false);
- return "";
- }
-}
-
-inline int32_t width_to_cl_vector_size(int32_t width)
-{
- switch(width)
- {
- case 1:
- return 1;
- case 2:
- return 2;
- case 3:
- return 3;
- case 4:
- return 4;
- case 5:
- case 6:
- case 7:
- case 8:
- return 8;
- case 9:
- case 10:
- case 11:
- case 12:
- case 13:
- case 14:
- case 15:
- case 16:
- return 16;
- default:
- assert(false);
- return 0;
- }
-}
-
-inline std::string get_cl_data_type(DataType dt, int32_t width)
-{
- std::string data_type;
- int32_t w = width_to_cl_vector_size(width);
- data_type += data_type_to_cl_type(dt);
- if(w != 1)
- {
- data_type += std::to_string(w);
- }
- return data_type;
-}
-
-inline std::string to_opencl_store(int32_t vector_length)
-{
- if(vector_length != 1)
- {
- return "vstore" + std::to_string(vector_length) + "(";
- }
- else
- {
- return "*(";
- }
-}
-
-struct TileInfo
-{
- TileInfo() {}
- TileInfo(DataType dt) : dt(dt), w(1), h(1) {}
- TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1) {}
- TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height) {}
- DataType dt{ DataType::Unknown }; // Data type of the tile
- int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels)
- int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions)
-};
-
-inline std::ostream& operator << (std::ostream& o, const TileInfo& a)
-{
- o << a.w << " x " << a.h;
- return o;
-}
-
-struct DataTypeAsString
-{
- std::string str { "" };
- DataType dt { DataType::Unknown };
- int32_t size { 1 };
-};
-
-struct ValueAsString
-{
- std::string str { "" };
- DataTypeAsString type { };
-};
-
-// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c
-// A Tile is a collection of variables used to express a 2D data.
-class IScalarTile
-{
-public:
- virtual ~IScalarTile() = default;
- /** Method to get the scalar variable from a tile
- * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
- * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
- *
- * @return the scalar variable as a string
- */
- virtual ValueAsString scalar(int32_t x, int32_t y) const = 0;
- /** Method to get the list of underlying variable names used by the tile
- *
- * @return the list of variable names
- */
- virtual std::vector<ValueAsString> underlying_source_variables() const = 0;
- /** Method to get the name of the tile.
- *
- * @return the name of the tile
- */
- std::string name() const
- {
- return _basename;
- }
- /** Method to get the tile format
- *
- * @return the format
- */
- TileInfo format() const
- {
- return _format;
- }
- /** Method to know whether the tile is assignable or not (constant)
- *
- * @return true if the tile is assignable
- */
- virtual bool is_assignable() const = 0;
- /** Method to know whether the tile needs to be declared
- *
- * @return true if the tile needs to be declared in the code before being used
- */
- virtual bool need_declaration() const = 0;
-protected:
- TileInfo _format { }; // Tile format
- std::string _basename { "" }; // Tile name
-};
-
-// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
-// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors
-class IVectorTile : public IScalarTile
-{
-public:
- virtual ~IVectorTile() = default;
- /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
- * The user can query the list of supported width for the vectors through preferred_vector_sizes().
- *
- * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
- *
- * @return the vector variable as a string
- */
- virtual ValueAsString vector(int32_t y) const = 0;
- /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
- *
- * @return the vector variable as a string
- */
- virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0;
- /** Method to get the preferred vector sizes.
- *
- * @return a vector with the preferred vector sizes
- */
- //virtual std::vector<int32_t> preferred_vector_sizes() const = 0;
-};
-
-class ClTile : public IVectorTile
-{
-public:
- ClTile(const std::string& name, TileInfo format)
- {
- _format = format;
- _basename = name;
- }
-
- ValueAsString scalar(int32_t x, int32_t y) const override
- {
- x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
- y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
- ValueAsString t;
- t.str = build_variable_name(y);
- t.type.str = get_cl_data_type(_format.dt, 1);
- t.type.dt = _format.dt;
- t.type.size = 1;
-
- // Check required because if the width has only one element, we cannot use .s0
- if(_format.w != 1)
- {
- // Automatic broadcasting
- t.str += ".s" + std::to_string(x);
- }
-
- return t;
- }
-
- ValueAsString vector(int32_t y) const override
- {
- y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
- ValueAsString t;
- t.str = build_variable_name(y);
- t.type.str = get_cl_data_type(_format.dt, _format.w);
- t.type.dt = _format.dt;
- t.type.size = _format.w;
- return t;
- }
-
- ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
- {
- y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
- ValueAsString t;
- t.str = build_variable_name(y);
- t.type.str = get_cl_data_type(_format.dt, width);
- t.type.dt = _format.dt;
- t.type.size = width;
-
- if(_format.w != 1)
- {
- t.str += ".s";
- for(int i = 0; i < width; ++i)
- {
- t.str += to_scalar_hex(x_start + i);
- }
- }
- return t;
- }
-
- std::vector<ValueAsString> underlying_source_variables() const override
- {
- std::vector<ValueAsString> vars;
- for(int32_t y = 0; y < _format.h; ++y)
- {
- ValueAsString t;
- t.str = build_variable_name(y);
- t.type.str = get_cl_data_type(_format.dt, _format.w);
- t.type.dt = _format.dt;
- t.type.size = _format.w;
- vars.push_back(t);
- }
- return vars;
- }
-
- bool is_assignable() const override
- {
- return true;
- }
-
- bool need_declaration() const override
- {
- return true;
- }
-
-private:
- std::string build_variable_name(int32_t y) const
- {
- std::string var_name = _basename;
-
- if(_format.h == 1)
- {
- return var_name;
-
- }
- else
- {
- var_name += "_";
- var_name += std::to_string(y);
- }
-
- return var_name;
- }
-
- std::string to_scalar_hex(int32_t x) const
- {
- switch(x)
- {
- case 0:
- case 1:
- case 2:
- case 3:
- case 4:
- case 5:
- case 6:
- case 7:
- case 8:
- case 9:
- return std::to_string(x);
- case 10:
- return "A";
- case 11:
- return "B";
- case 12:
- return "C";
- case 13:
- return "D";
- case 14:
- return "E";
- case 15:
- return "F";
- default:
- std::cout << "Unsupported hexadecimal value" << std::endl;
- assert(false);
- return "";
- }
- }
-};
-
-// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time.
-class ClConstantTile : public IVectorTile
-{
-public:
- ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt)
- {
- _format.w = in[0].size();
- _format.h = in.size();
- _format.dt = dt;
-
- _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
-
- for(int32_t y = 0; y < _format.h; ++y)
- {
- for(int32_t x = 0; x < _format.w; ++x)
- {
- _data[y][x] = in[y][x];
- }
- }
- }
-
- ValueAsString scalar(int32_t x, int32_t y) const override
- {
- x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
- y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
- ValueAsString t;
- t.str = _data[y][x];
- t.type.str = get_cl_data_type(_format.dt, 1);
- t.type.dt = _format.dt;
- t.type.size = 1;
-
- return t;
- }
-
- ValueAsString vector(int32_t y) const override
- {
- y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
- return vector(0, _format.w, y);
- }
-
- ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
- {
- y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
- ValueAsString t;
- t.str = "";
- t.type.str = get_cl_data_type(_format.dt, width);
- t.type.dt = _format.dt;
- t.type.size = width;
-
- if(width > 1)
- {
- t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
- }
-
- int32_t x = x_start;
- for(; x < width - 1; ++x)
- {
- t.str += scalar(x, y).str;
- t.str += ", ";
- }
- t.str += scalar(x, y).str;
-
- if(width > 1)
- {
- t.str += "))";
- }
-
- return t;
- }
-
- std::vector<ValueAsString> underlying_source_variables() const override
- {
- std::vector<ValueAsString> vars;
-
- for(int32_t y = 0; y < _format.h; ++y)
- {
- for(int32_t x = 0; x < _format.w; ++x)
- {
- ValueAsString t;
- t.str = _data[y][x];
- t.type.str = get_cl_data_type(_format.dt, 1);
- t.type.dt = _format.dt;
- t.type.size = 1;
- vars.push_back(t);
- }
- }
-
- return vars;
- }
-
- bool is_assignable() const override
- {
- return false;
- }
-
- bool need_declaration() const override
- {
- return false;
- }
-
-private:
- std::vector<std::vector<std::string>> _data{};
-};
-
-enum class TensorComponentIndex : int32_t
-{
- IndexMask = 0x0000000f,
-};
-
-enum class TensorComponentType : int32_t
-{
- OffsetFirstElement = 0x00000100,
- Stride = 0x00001000,
- Dimension = 0x00010000,
- FoldedDimension = 0x00100000,
- Constant = 0x01000000
-};
-
-enum class TensorComponent : int32_t
-{
- Unknown = 0x00000000,
- OffsetFirstElement = 0x00000100,
- Stride1 = 0x00001001,
- Stride2 = 0x00001002,
- Stride3 = 0x00001003,
- Stride4 = 0x00001004,
- Dim0 = 0x00010000,
- Dim1 = 0x00010001,
- Dim2 = 0x00010002,
- Dim3 = 0x00010003,
- Dim4 = 0x00010004,
- C = 0x00010000, // Dim0
- W = 0x00010001, // Dim1
- H = 0x00010002, // Dim2
- D = 0x00010003,
- N = 0x00010004,
- Dim1xDim2 = 0x00100021,
- Dim1xDim2xDim3 = 0x00100321,
- WxH = 0x00100021,
- WxHxD = 0x00100321
-};
-
-inline std::string to_string(TensorComponent x)
-{
- switch(x)
- {
- case TensorComponent::Unknown:
- return "Unknown";
- case TensorComponent::OffsetFirstElement:
- return "OffsetFirstElement";
- case TensorComponent::Stride1:
- return "Stride1";
- case TensorComponent::Stride2:
- return "Stride2";
- case TensorComponent::Stride3:
- return "Stride3";
- case TensorComponent::Stride4:
- return "Stride4";
- case TensorComponent::Dim0:
- return "Dim0";
- case TensorComponent::Dim1:
- return "Dim1";
- case TensorComponent::Dim2:
- return "Dim2";
- case TensorComponent::Dim3:
- return "Dim3";
- case TensorComponent::Dim4:
- return "Dim4";
- case TensorComponent::Dim1xDim2:
- return "Dim1xDim2";
- case TensorComponent::Dim1xDim2xDim3:
- return "Dim1xDim2xDim3";
- default:
- assert(false);
- }
-}
-
-class ITensorArgument
-{
-public:
- virtual ~ITensorArgument() = default;
- /** Method to get the tensor component as a string
- *
- * @param[in] x tensor component to query
- *
- * @return the tensor component as a string
- */
- virtual std::string component(TensorComponent x) = 0;
- /** Method to get the tensor component type declaration as a string
- *
- * @return the tensor component type declaration as a string
- */
- virtual std::string component_type_declaration() const = 0;
- /** Method to get the tensor component data type
- *
- * @return the tensor component data type
- */
- virtual DataType component_data_type() const = 0;
- /** Method to get the tensor component declarations
- *
- * @return a vector containing the tensor component declarations
- */
- virtual std::vector<TensorComponent> component_declarations() const = 0;
- /** Method to get the name of the tensor argument.
- *
- * @return the name of the tensor argument
- */
- std::string name() const
- {
- return _basename;
- }
- /** Method to get the tensor format
- *
- * @return the format
- */
- TensorInfo format() const
- {
- return _format;
- }
-
-protected:
- TensorInfo _format { };
- std::string _basename {};
-};
-
-enum class GpuTensorStorage : int32_t
-{
- Unknown = 0x0000,
- BufferUint8Ptr = 0x0012,
- Image2dReadOnly = 0x0020,
- Image2dWriteOnly = 0x0021,
- Image3dReadOnly = 0x0030,
- Image3dWriteOnly = 0x0031
-};
-
-class IGpuTensorArgument : public ITensorArgument
-{
-public:
- virtual ~IGpuTensorArgument() = default;
- /** Method to get the tensor storage, which is the underlying storage used to keep the data memory
- *
- * @param[in] x tensor storage to query
- *
- * @return the tensor storage as a string
- */
- virtual std::string storage(GpuTensorStorage x) = 0;
- /** Method to get the tensor storage type declaration as a string
- *
- * @param[in] x tensor component to query
- *
- * @return the tensor storage type declaration as a string
- */
- virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0;
- /** Method to get the tensor storage declarations
- *
- * @return a vector containing the tensor storage declarations
- */
- virtual std::vector<GpuTensorStorage> storage_declarations() const = 0;
-};
-
-class ClTensorArgument : public IGpuTensorArgument
-{
-public:
- ClTensorArgument(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible)
- {
- _basename = name;
- _format = x;
- _return_by_value_when_possible = return_by_value_when_possible;
- }
-
- // Methods to override
- std::string component(TensorComponent x) override
- {
- if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::Constant)))
- {
- int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
- return std::to_string(idx - 1);
- }
-
- if(_return_by_value_when_possible)
- {
- if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::Dimension)))
- {
- int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
- return std::to_string(_format.shape[idx]);
- }
-
- if((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentType::FoldedDimension)))
- {
- switch(x)
- {
- case TensorComponent::Dim1xDim2:
- return std::to_string(_format.shape[1] * _format.shape[2]);
- case TensorComponent::Dim1xDim2xDim3:
- return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]);
- default:
- std::cout << "Unsupported folded dimension" << std::endl;
- assert(false);
- }
- }
- }
-
- if(std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
- {
- _components_required.push_back(x);
- }
-
- return build_component_name(x);
- }
-
- std::string component_type_declaration() const override
- {
- return "int";
- };
-
- DataType component_data_type() const override
- {
- return DataType::Int32;
- }
-
- std::string storage(GpuTensorStorage x) override
- {
- if(std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
- {
- _storage_required.push_back(x);
- }
-
- return build_storage_name(x);
- }
-
- std::string storage_type_declaration(GpuTensorStorage x) const override
- {
- switch(x)
- {
- case GpuTensorStorage::BufferUint8Ptr:
- return "__global uchar*";
- case GpuTensorStorage::Image2dReadOnly:
- return "__read_only image2d_t";
- case GpuTensorStorage::Image2dWriteOnly:
- return "__write_only image2d_t";
- case GpuTensorStorage::Image3dReadOnly:
- return "__read_only image3d_t ";
- case GpuTensorStorage::Image3dWriteOnly:
- return "__write_only image3d_t ";
- default:
- std::cout << "Unsupported storage" << std::endl;
- assert(false);
- return "";
- }
- };
-
- std::vector<GpuTensorStorage> storage_declarations() const override
- {
- return _storage_required;
- }
-
- std::vector<TensorComponent> component_declarations() const override
- {
- return _components_required;
- }
-
-private:
- std::string build_storage_name(GpuTensorStorage x) const
- {
- std::string var_name = _basename;
-
- switch(x)
- {
- case GpuTensorStorage::BufferUint8Ptr:
- return var_name + "_ptr";
- case GpuTensorStorage::Image2dReadOnly:
- case GpuTensorStorage::Image2dWriteOnly:
- return var_name + "_img2d";
- case GpuTensorStorage::Image3dReadOnly:
- case GpuTensorStorage::Image3dWriteOnly:
- return var_name + "_img3d";
- default:
- std::cout << "Unsupported storage" << std::endl;
- assert(false);
- }
-
- return var_name;
- }
-
- std::string build_component_name(TensorComponent x) const
- {
- std::string var_name = _basename;
-
- switch(x)
- {
- case TensorComponent::OffsetFirstElement:
- return var_name + "_offset_first_element";
- case TensorComponent::Stride1:
- return var_name + "_stride1";
- case TensorComponent::Stride2:
- return var_name + "_stride2";
- case TensorComponent::Stride3:
- return var_name + "_stride3";
- case TensorComponent::Dim0:
- return var_name + "_dim0";
- case TensorComponent::Dim1:
- return var_name + "_dim1";
- case TensorComponent::Dim2:
- return var_name + "_dim2";
- case TensorComponent::Dim3:
- return var_name + "_dim3";
- case TensorComponent::Dim1xDim2:
- return var_name + "_dim1xdim2";
- case TensorComponent::Dim1xDim2xDim3:
- return var_name + "_dim1xdim2xdim3";
- default:
- std::cout << "Unsupported component" << std::endl;
- assert(false);
- }
-
- return var_name;
- }
-
- bool _return_by_value_when_possible { false };
- std::vector<GpuTensorStorage> _storage_required {};
- std::vector<TensorComponent> _components_required {};
-};
-
-/**
- * @brief Data structure that contains the declared tiles by the components.
- * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to
- * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack
- * and remove (pop) all the tiles from the level above.
- * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code.
- * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions
- * when declaring tiles among different components.
- *
- */
-class GpuTileRegistry
-{
-public:
-enum class RegistryTileType
-{
- Tile,
- Link
-};
-
-using RegistryIdSpace = int32_t;
-using RegistryLevel = int32_t;
-using RegistryTileName = std::string;
-
-struct RegistryTileTableEntry
-{
- RegistryLevel registry_level { 0 };
- std::unique_ptr<IVectorTile> tile_object { nullptr };
-};
-
-struct RegistryTileTypeTableEntry
-{
- RegistryTileType tile_type { RegistryTileType::Tile };
- RegistryTileName tile_name {};
- RegistryIdSpace registry_idspace { 0 };
- RegistryLevel registry_level { 0 };
-};
-
-using RegistryTileTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
-using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>;
- /**
- * @brief Construct a new Gpu Tile Registry object
- *
- */
- GpuTileRegistry()
- {
- _language = GpuTargetLanguage::Unknown;
- }
- /**
- * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language
- *
- * @param[in] language Gpu programming language to use
- */
- GpuTileRegistry(GpuTargetLanguage language)
- {
- _language = language;
- }
- /**
- * @brief Default destructor. Destroy the Gpu Tile Registry object
- *
- */
- ~GpuTileRegistry() = default;
- /**
- * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles.
- * Therefore, the IdSpace should be set before declaring any tiles.
- *
- * @param[in] id The IdSpace id
- */
- void set_IdSpace(int32_t id)
- {
- _IdSpace = id;
- }
- /**
- * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles
- *
- * @return The IdSpace id
- */
- int32_t IdSpace() const
- {
- return _IdSpace;
- }
- /**
- * @brief Gets all the IdSpace declarations defined in the tile registry.
- *
- * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
- */
- std::vector<int32_t> IdSpace_declarations() const
- {
- std::vector<int32_t> x;
-
- auto it = _frags.begin();
-
- while (it != _frags.end())
- {
- x.push_back(it->first);
-
- it++;
- }
-
- return x;
- }
- /**
- * @brief Declare a tile from a previously created tile
- */
- void insert(const std::string& name, const IVectorTile *frag)
- {
- assert(_language == GpuTargetLanguage::OpenCL);
- const int32_t key_IdSpace = _IdSpace;
- const std::string key_var_name = name;
- const std::string var_name = frag->name();
- TileInfo format = frag->format();
-
- // First check whether a tile with the same name exists
- IVectorTile *result = (*this)[key_var_name];
- assert(result == nullptr);
- if(result == nullptr)
- {
- std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
-
- _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
- _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
-
- _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Link;
- _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
- _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
- _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
- }
- }
- /**
- * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace()
- *
- * @note The reference name used for declaring the tile should not be previously used in the IdSpace
- *
- * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
- * @param[in] format Tile format use to use
- */
- void insert(const std::string& name, const TileInfo& format)
- {
- assert(_language == GpuTargetLanguage::OpenCL);
- const int32_t key_IdSpace = _IdSpace;
- const std::string key_var_name = name;
- const std::string var_name = generate_tile_name(name);
-
- // First check whether a tile with the same name exists
- IVectorTile *result = (*this)[key_var_name];
- assert(result == nullptr);
- if(result == nullptr)
- {
- std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
- _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
- _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
-
- _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
- _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
- _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
- _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
- }
- }
- /**
- * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string
- *
- * @note The reference name used for declaring the tile should not be previously used in the IdSpace
- *
- * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
- * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
- * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure
- * that the data type is aligned with the content of the std::string.
- */
- void insert(const std::string& name, const std::vector<std::vector<std::string>>& in, DataType dt)
- {
- assert(_language == GpuTargetLanguage::OpenCL);
- const int32_t key_IdSpace = _IdSpace;
- const std::string key_var_name = name;
-
- // First check whether a tile with the same name exists
- IVectorTile *result = (*this)[key_var_name];
- assert(result == nullptr);
- if(result == nullptr)
- {
- std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
- _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
- _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
-
- _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
- _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
- _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
- _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
- }
- }
- /**
- * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string
- *
- * @note This method can be used to declare temporary tiles that need to be accessed only once.
- *
- * @param[in] in A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
- * @param[in] dt The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure
- * that the data type is aligned with what passed with the std::string.
- *
- * @return IVectorTile* the anonymous constant tile
- */
- IVectorTile* insert(const std::vector<std::vector<std::string>>& in, DataType dt)
- {
- assert(_language == GpuTargetLanguage::OpenCL);
- const int32_t key_IdSpace = _IdSpace;
- const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++);
-
- // First check whether a tile with the same name exists
- IVectorTile *result = (*this)[key_var_name];
- assert(result == nullptr);
- if(result == nullptr)
- {
- std::unique_ptr<ClConstantTile> tile = std::make_unique<ClConstantTile>(in, dt);
- _frags[key_IdSpace][key_var_name].tile_object = std::move(tile);
- _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
-
- _frag_types[key_IdSpace][key_var_name].tile_type = RegistryTileType::Tile;
- _frag_types[key_IdSpace][key_var_name].tile_name = key_var_name;
- _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
- _frag_types[key_IdSpace][key_var_name].registry_level = _registry_level;
- }
-
- return (*this)[key_var_name];
- }
- /**
- * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user
- *
- * @param[in] name The name of the tile to retrieve
- * @param[in] IdSpace The IdSpace id where to search the tile
- *
- * @return IVectorTile* The tile
- */
- IVectorTile* get(const std::string& name, int32_t IdSpace)
- {
- const int32_t key_IdSpace = IdSpace;
- const std::string key_var_name = name;
-
- IVectorTile* result = nullptr;
- auto search_IdSpace = _frags.find(key_IdSpace);
- if(search_IdSpace != _frags.end())
- {
- auto search_tile = _frags[key_IdSpace].find(key_var_name);
- if(search_tile != _frags[key_IdSpace].end())
- {
- result = search_tile->second.tile_object.get();
- assert(result != nullptr);
- }
- }
-
- return result;
- }
- /**
- * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace()
- *
- * @param[in] name The name of the tile to retrieve
- *
- * @return IVectorTile* The tile
- */
- IVectorTile* operator[](const std::string& name)
- {
- return get(name, _IdSpace);
- }
- /**
- * @brief Check whether the tile in the in the IdSpace provided by the user exists
- *
- * @param[in] name Name of the tile to search for
- * @param[in] IdSpace The IdSpace id where to search the tile
- *
- * @return true if the tile exists
- * @return false if the tile does not exist
- */
- bool has_tile(const std::string& name, int32_t IdSpace) const
- {
- const int32_t key_IdSpace = IdSpace;
- const std::string key_var_name = name;
-
- // IVectorTile* result = nullptr;
- auto search_IdSpace = _frags.find(key_IdSpace);
-
- return search_IdSpace != _frags.end();
- }
- /**
- * @brief Check whether the tile within the current IdSpace exists
- *
- * @param[in] name Name of the tile to search for
- *
- * @return true if the tile exists
- * @return false if the tile does not exist
- */
- bool has_tile(const std::string& name) const
- {
- return has_tile(name, _IdSpace);
- }
- /**
- * @brief Get all the tiles declared within the IdSpace provided by the user
- *
- * @param[in] IdSpace IdSpace where to retrieve all the declared tiles
- *
- * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user
- */
- std::vector<IVectorTile*> tile_declarations(int32_t IdSpace)
- {
- std::vector<IVectorTile*> tiles;
-
- std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
-
- while (it != _frag_types[IdSpace].end())
- {
- // The following line should be enabled. However, we cannot at this stage
- // because it used to retrieve the output tile produced by each component.
- // However, this method should NOT be used to retrieve the output tile
- //if(it->second.tile_type == RegistryTileType::Tile)
- {
- tiles.push_back(get(it->second.tile_name, it->second.registry_idspace));
- }
- it++;
- }
-
- return tiles;
- }
- /**
- * @brief Increase the level of stack.
- *
- */
- void increment_registry_level()
- {
- _registry_level++;
- }
- /**
- * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack.
- *
- */
- void decrement_registry_level()
- {
- assert(_registry_level >= 0);
-
- // Remove all variables in the local scope
- std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
-
- while (it != _frags[_IdSpace].end())
- {
- if (it->second.registry_level == _registry_level)
- {
- it = _frags[_IdSpace].erase(it);
- }
- else
- {
- it++;
- }
- }
-
- std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
-
- while (it_type != _frag_types[_IdSpace].end())
- {
- if (it_type->second.registry_level == _registry_level)
- {
- it_type = _frag_types[_IdSpace].erase(it_type);
- }
- else
- {
- it_type++;
- }
- }
-
- _registry_level--;
- }
- /**
- * @brief Get the level of the stack
- *
- */
- int32_t level() const
- {
- return _registry_level;
- }
-
-private:
- // This method ensures that the key is unique among different components
- std::string generate_tile_name(const std::string& name)
- {
- assert(_IdSpace >= 0 );
- if(_registry_level == 0)
- {
- return "_G" + std::to_string(_IdSpace) + "_" + name;
- }
- else
- {
- return name;
- }
- }
- RegistryTileTable _frags {};
- RegistryTileTypeTable _frag_types {};
- RegistryLevel _registry_level { 0 };
- RegistryIdSpace _IdSpace { -1 };
- int32_t _anonymous_frag_count { 0 }; // Counter used to create the anonymous tiles
- GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language
-};
-
-using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
-
-/**
- * @brief Data structure that contains the tensors consumed by the components.
- * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions
- * when declaring tensors among different components.
- *
- */
-class GpuTensorArgumentRegistry
-{
-public:
- /**
- * @brief Construct a new Gpu Tensor Registry object
- *
- */
- GpuTensorArgumentRegistry()
- {
- _language = GpuTargetLanguage::Unknown;
- }
- /**
- * @brief Construct a new Gpu Tensor Registry object
- *
- * @param[in] language Gpu programming language to use
- */
- GpuTensorArgumentRegistry(GpuTargetLanguage language)
- {
- _language = language;
- }
- /**
- * @brief Default destructor. Destroy the Gpu Tensor Registry object
- *
- */
- ~GpuTensorArgumentRegistry() = default;
- /**
- * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors.
- * Therefore, the IdSpace should be set before declaring any tensors.
- *
- * @param[in] id The IdSpace id
- */
- void set_IdSpace(int32_t id)
- {
- _IdSpace = id;
- }
- /**
- * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors
- *
- * @return The IdSpace id
- */
- int32_t IdSpace() const
- {
- return _IdSpace;
- }
- /**
- * @brief Gets all the IdSpace declarations defined in the tensor registry.
- *
- * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
- */
- std::vector<int32_t> IdSpace_declarations() const
- {
- std::vector<int32_t> x;
-
- auto it = _refs.begin();
-
- while (it != _refs.end())
- {
- x.push_back(it->first);
-
- it++;
- }
-
- return x;
- }
- /**
- * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace()
- *
- * @note The reference name used for declaring the tensor should not be previously used in the IdSpace
- *
- * @param[in] name Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry.
- * @param[in] x Pair of tensor info and tensor id
- * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components
- */
- void insert(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible)
- {
- assert(_language == GpuTargetLanguage::OpenCL);
- const int32_t key_IdSpace = _IdSpace;
- const int32_t tensor_id = x.id;
- const std::string key_var_name = name;
- const std::string var_name = generate_tensor_name(name, tensor_id);
-
- // First, check whether the tensor has already a reference. If so, trigger an assert
- assert(!has_tensor_argument(name));
-
- // Check whether a tensor with that tensorID exists
- auto result = _tensor_arguments.find(tensor_id);
- if(result == _tensor_arguments.end())
- {
- // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
- std::unique_ptr<ClTensorArgument> arg = std::make_unique<ClTensorArgument>(var_name, x, return_by_value_when_possible);
- _tensor_arguments[tensor_id] = std::move(arg);
- }
-
- _refs[key_IdSpace][key_var_name] = tensor_id;
- }
- /**
- * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace()
- *
- * @param[in] name The name of the tensor to retrieve
- *
- * @return IGpuTensor* The tensor
- */
- IGpuTensorArgument* operator[](const std::string& name)
- {
- const int32_t key_IdSpace = _IdSpace;
- const std::string key_var_name = name;
-
- IGpuTensorArgument* result = nullptr;
- auto search_IdSpace = _refs.find(key_IdSpace);
- if(search_IdSpace != _refs.end())
- {
- auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
-
- if(search_tensor_id != _refs[key_IdSpace].end())
- {
- const int32_t tensor_id = search_tensor_id->second;
- auto search_tensor_argument = _tensor_arguments.find(tensor_id);
- if(search_tensor_argument != _tensor_arguments.end())
- {
- result = search_tensor_argument->second.get();
- }
- assert(result != nullptr);
- }
- }
-
- return result;
- }
- /**
- * @brief Get all the tensors declared in the IdSpace provided by the user
- *
- * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors
- */
- std::vector<IGpuTensorArgument*> tensor_argument_declarations()
- {
- std::vector<IGpuTensorArgument*> args;
-
- auto it = _tensor_arguments.begin();
-
- while (it != _tensor_arguments.end())
- {
- args.push_back(it->second.get());
- it++;
- }
-
- return args;
- }
- /**
- * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists
- *
- * @param[in] name Name of the tensor argument to search for
- *
- * @return true if the tensor argument exists
- * @return false if the tensor argument does not exist
- */
- bool has_tensor_argument(const std::string& name)
- {
- const int32_t key_IdSpace = _IdSpace;
- const std::string key_var_name = name;
-
- auto search_IdSpace = _refs.find(key_IdSpace);
-
- if(search_IdSpace != _refs.end())
- {
- auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
-
- return search_tensor_id != _refs[key_IdSpace].end();
- }
- else
- {
- return false;
- }
- }
- /**
- * @brief Check whether the tensor argument is in the the IdSpace provided by the user
- *
- * @param[in] name Name of the tensor argument to search for
- * @param[in] IdSpace The IdSpace id where to search the tensor argument
- *
- * @return true if the tile exists
- * @return false if the tile does not exist
- */
- bool has_tensor_argument(const std::string& name, int32_t IdSpace)
- {
- const int32_t key_IdSpace = IdSpace;
- const std::string key_var_name = name;
-
- auto search_IdSpace = _refs.find(key_IdSpace);
-
- if(search_IdSpace != _refs.end())
- {
- auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
-
- return search_tensor_id != _refs[key_IdSpace].end();
- }
- else
- {
- return false;
- }
- }
-private:
- // This method ensures that the key is unique among different components
- std::string generate_tensor_name(const std::string& name, int32_t tensor_id)
- {
- assert(tensor_id >= 0 );
-
- return name + std::to_string(tensor_id);
- }
-
- std::map<int32_t, TensorEntry> _tensor_arguments {};
- std::map<int32_t, std::map<std::string, int32_t>> _refs {};
- int32_t _IdSpace { -1 };
- GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language
-};
-
-enum class OpType : int32_t
-{
- Elementwise = 0x0000,
- Relational = 0x1000,
- Algebra = 0x2000
-};
-
-inline std::string to_string(AssignmentOp op)
-{
- switch(op)
- {
- case AssignmentOp::Decrement:
- return "-=";
- case AssignmentOp::Increment:
- return "+=";
- default:
- assert(false);
- return "";
- }
-}
-
-inline std::string to_string(BinaryOp op)
-{
- switch(op)
- {
- case BinaryOp::Add:
- return "+";
- case BinaryOp::Sub:
- return "-";
- case BinaryOp::Mul:
- return "*";
- case BinaryOp::Div:
- return "/";
- case BinaryOp::Mod:
- return "%";
- case BinaryOp::Equal:
- return "==";
- case BinaryOp::Less:
- return "<";
- case BinaryOp::LessEqual:
- return "<=";
- case BinaryOp::Greater:
- return ">";
- case BinaryOp::GreaterEqual:
- return ">=";
- case BinaryOp::LogicalAnd:
- return "&&";
- case BinaryOp::LogicalOr:
- return "||";
- case BinaryOp::LogicalNot:
- return "!";
- default:
- assert(false);
- return "";
- }
-}
-
-inline std::string binary_op_string(BinaryOp op)
-{
- switch(op)
- {
- case BinaryOp::Add:
- return "add";
- case BinaryOp::Sub:
- return "sub";
- case BinaryOp::Mul:
- return "mul";
- case BinaryOp::Div:
- return "div";
- case BinaryOp::Mod:
- return "mod";
- case BinaryOp::Equal:
- return "eq";
- case BinaryOp::Less:
- return "gt";
- case BinaryOp::LessEqual:
- return "gteq";
- case BinaryOp::Greater:
- return "lt";
- case BinaryOp::GreaterEqual:
- return "lte";
- default:
- assert(false);
- return "";
- }
-}
-
-enum class OperandType : int32_t
-{
- Unknown = 0x00000000,
- ScalarFp32 = 0x00001011, // Immediate scalar tile
- ScalarFp16 = 0x00001012, // Immediate scalar tile
- ScalarInt32 = 0x00001021, // Immediate scalar tile
- ScalarInt16 = 0x00001022, // Immediate scalar tile
- ScalarInt8 = 0x00001024, // Immediate scalar tile
- ScalarUInt32 = 0x00001031, // Immediate scalar tile
- ScalarUInt16 = 0x00001032, // Immediate scalar tile
- ScalarUInt8 = 0x00001034, // Immediate scalar tile
- ScalarBool = 0x00001041, // Immediate scalar tile
- ScalarTile = 0x00001050, // Scalar from a tile
- Tile = 0x00010000, // Tile
- TensorStride1 = 0x00100001, // Tensor component
- TensorStride2 = 0x00100002, // Tensor component
- TensorStride3 = 0x00100003, // Tensor component
- TensorStride4 = 0x00100004, // Tensor component
- TensorDim0 = 0x00100010, // Tensor component
- TensorDim1 = 0x00100020, // Tensor component
- TensorDim2 = 0x00100030, // Tensor component
- TensorDim3 = 0x00100040, // Tensor component
- TensorDim4 = 0x00100050, // Tensor component
- TensorC = 0x00100010, // Tensor component
- TensorW = 0x00100020, // Tensor component
- TensorH = 0x00100030, // Tensor component
- TensorD = 0x00100040, // Tensor component
- TensorN = 0x00100050, // Tensor component
- TensorDim1xDim2 = 0x00100100, // Tensor component
- TensorDim1xDim2xDim3 = 0x00100200, // Tensor component
- TensorWxH = 0x00100300, // Tensor component
- TensorWxHxD = 0x00100400, // Tensor component
- TensorDataOffset = 0x00100500, // Tensor component
-};
-
-struct ScalarTileCoord
-{
- ScalarTileCoord() {}
- ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0) {}
- int32_t x { -1 };
- int32_t y { -1 };
-};
-/**
- * @brief Operand class. This object is used to pass the operands to the operations performed by the writer.
- * Operand can be of three types:
- * -# Scalar immediate: constant expression
- * -# Tile: A tile
- * -# Tensor component: A component (scalar) of a tensor
- *
- */
-class Operand
-{
-public:
- Operand(const std::string &val)
- {
- _str = val;
- _type = OperandType::Tile;
- }
-
- Operand(const std::string &val, const ScalarTileCoord& coord)
- {
- _str = val;
- _type = OperandType::ScalarTile;
- _coord = coord;
- }
-
- Operand(const std::string &val, OperandType type)
- {
- _str = val;
- _type = type;
- }
-
- Operand(const Operand& t)
- {
- _str = t.value();
- _type = t.type();
- }
-
- Operand& operator=(const Operand& t)
- {
- _str = t.value();
- _type = t.type();
- _coord = t.scalar_tile_coordinate();
- return *this;
- }
-
- std::string value() const
- {
- return _str;
- }
-
- OperandType type() const
- {
- return _type;
- }
-
- ScalarTileCoord scalar_tile_coordinate() const
- {
- return _coord;
- }
-
-private:
- std::string _str {};
- OperandType _type { OperandType::Unknown };
- ScalarTileCoord _coord {};
-};
-
-enum class GpuSamplerTensorStorage : int32_t
-{
- Unknown = static_cast<int32_t>(GpuTensorStorage::Unknown),
- BufferUint8Ptr = static_cast<int32_t>(GpuTensorStorage::BufferUint8Ptr),
- Image2dReadOnly = static_cast<int32_t>(GpuTensorStorage::Image2dReadOnly),
- Image2dWriteOnly = static_cast<int32_t>(GpuTensorStorage::Image2dWriteOnly),
- Image3dReadOnly = static_cast<int32_t>(GpuTensorStorage::Image3dReadOnly),
- Image3dWriteOnly = static_cast<int32_t>(GpuTensorStorage::Image2dWriteOnly),
-};
-
-struct GpuSampler
-{
- GpuSampler() = default;
- TensorSamplerFormat format { TensorSamplerFormat::Unknown };
- GpuSamplerTensorStorage storage { GpuSamplerTensorStorage::Unknown };
- TensorSamplerAddressModeX address_mode_x { TensorSamplerAddressModeX::Unknown };
- TensorSamplerAddressModeY address_mode_y { TensorSamplerAddressModeY::Unknown };
- TensorSamplerAddressModeZ address_mode_z { TensorSamplerAddressModeZ::Unknown };
-};
-
-inline GpuSampler create_simple_sampler(const TensorInfo* tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z)
-{
- CKW_UNUSED(step_x, step_y, step_z);
-
- auto tensor = tensor_info_id->shape;
-
- GpuSampler dst_sampler;
- dst_sampler.format = sampler.format;
- dst_sampler.storage = GpuSamplerTensorStorage::BufferUint8Ptr;
- dst_sampler.address_mode_x = sampler.address_mode_x;
- dst_sampler.address_mode_y = sampler.address_mode_y;
- dst_sampler.address_mode_z = sampler.address_mode_z;
-
- int32_t dim_x = 0;
- int32_t dim_y = 0;
- int32_t dim_z = 0;
-
- switch(sampler.format)
- {
- case TensorSamplerFormat::C_W_H:
- dim_x = tensor[0];
- dim_y = tensor[1];
- dim_z = tensor[2];
- break;
- case TensorSamplerFormat::C_WH_1:
- dim_x = tensor[0];
- dim_y = tensor[1] * tensor[2];
- dim_z = 1;
- break;
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- break;
- }
-
- if(dim_x == 1)
- {
- assert(step_x == 1);
- dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
- }
-
- if(dim_y == 1)
- {
- assert(step_y == 1);
- dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
- }
-
- if(dim_z == 1)
- {
- assert(step_z == 1);
- dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
- }
-
- return dst_sampler;
-}
-
-class GpuOutputSampler
-{
-public:
- GpuOutputSampler() = default;
- /**
- * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once
- * by the root component. Once initialized, all simpler components will need to used this sampler
- * or a broadcasted version of it
- *
- * @param[in] sampler GpuSampler
- * @param[in] step_x Increment step in the X direction. Not necessarily it is the same of n0 of tile!
- * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
- * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
- */
- void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z)
- {
- assert(_is_initialized == false);
-
- _step_x = step_x;
- _step_y = step_y;
- _step_z = step_z;
- _tensor_info_id = tensor_info_id;
- _sampler = create_sampler(tensor_storage, tensor_format);
- _is_initialized = true;
- };
-
- GpuSampler sampler() const
- {
- return _sampler;
- };
-
- int32_t step_x() const
- {
- return _step_x;
- };
-
- int32_t step_y() const
- {
- return _step_y;
- };
-
- int32_t step_z() const
- {
- return _step_z;
- };
-private:
- GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format)
- {
- // Output can only be in output mode
- assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly);
- assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly);
-
- auto tensor = _tensor_info_id->shape;
-
- GpuSampler sampler;
- sampler.format = tensor_format;
- sampler.storage = tensor_storage;
- sampler.address_mode_x = TensorSamplerAddressModeX::None;
- sampler.address_mode_y = TensorSamplerAddressModeY::None;
- sampler.address_mode_z = TensorSamplerAddressModeZ::None;
-
- // In the case of texture, we do not need any special checks at the border
- if(tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
- {
- int32_t dim_x = 0;
- int32_t dim_y = 0;
- int32_t dim_z = 0;
-
- switch(tensor_format)
- {
- case TensorSamplerFormat::C_W_H:
- dim_x = tensor[0];
- dim_y = tensor[1];
- dim_z = tensor[2];
- break;
- case TensorSamplerFormat::C_WH_1:
- dim_x = tensor[0];
- dim_y = tensor[1] * tensor[2];
- dim_z = 1;
- break;
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- break;
- }
-
- if((dim_x % _step_x) != 0 && dim_x != 1)
- {
- sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
- }
-
- if((dim_y % _step_y) != 0 && dim_y != 1)
- {
- sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
- }
-
- if((dim_z % _step_z) != 0 && dim_z != 1)
- {
- sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
- }
- }
-
- return sampler;
- }
- GpuSampler _sampler { }; // GpuSampler
- int32_t _step_x { 1 };
- int32_t _step_y { 1 };
- int32_t _step_z { 1 };
- const TensorInfo* _tensor_info_id { nullptr };
- bool _is_initialized { false };
-};
-
-/**
- * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer.
- */
-class TensorOperand
-{
-public:
- TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler)
- {
- }
-
- TensorOperand& operator=(const TensorOperand& t)
- {
- _str = t.value();
- _sampler = t.sampler();
- return *this;
- }
-
- std::string value() const
- {
- return _str;
- }
-
- GpuSampler sampler() const
- {
- return _sampler;
- }
-
-private:
- std::string _str {};
- GpuSampler _sampler {};
-};
-
-/**
- * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer
- * This data structure must be initialized before being passed to the Gpu Kernel Writer
- *
- */
-class GpuKernelWriterDataHolder
-{
-public:
- /**
- * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store
- * the GPU target and target specific capabilities (extensions). For now, we just initialize the
- * programming language
- *
- * @param[in] language Gpu programming language to use
- */
- GpuKernelWriterDataHolder(GpuTargetLanguage language) : tiles(language), arguments(language), code(""), _language(language)
- {
- }
- /**
- * @brief Get the Gpu programming language used
- *
- * @return GpuTargetLanguage the Gpu programming language
- */
- GpuTargetLanguage programming_language() const
- {
- return _language;
- }
- /**
- * @brief @ref GpuTileRegistry
- *
- */
- GpuTileRegistry tiles{};
- /**
- * @brief @ref GpuTensorArgumentRegistry
- *
- */
- GpuTensorArgumentRegistry arguments{};
- /**
- * @brief @ref GpuOutputSampler.
- *
- */
- GpuOutputSampler output_sampler{};
- /**
- * @brief Source code
- *
- */
- std::string code{};
-
- // GpuExtensionRegistry extensions{};
-private:
- GpuTargetLanguage _language;
-};
-
-struct LWS
-{
- int32_t x {1};
- int32_t y {1};
- int32_t z {1};
-};
-
-/**
- * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker
- * declare an anonymous tile in the tile registry.
- */
-class OperandUnpacker
-{
-public:
- OperandUnpacker(GpuTileRegistry& tiles, GpuTensorArgumentRegistry& arguments) : _tiles(tiles), _arguments(arguments)
- {
- // Increase the level of the stack to allocate possible temporary tiles
- _tiles.increment_registry_level();
- };
-
- ~OperandUnpacker()
- {
- // Decrease the level of the stack to deallocate any temporary tiles
- _tiles.decrement_registry_level();
- }
-
- IVectorTile* unpack(const Operand& src)
- {
- // Get the tile
- if(src.type() == OperandType::Tile)
- {
- assert(_tiles.has_tile(src.value()));
- return _tiles[src.value()];
- }
- // Create an anonymous tile with a constant
- else if(static_cast<int32_t>(src.type()) & 0x00001000)
- {
- if(src.type() == OperandType::ScalarTile)
- {
- ScalarTileCoord coord = src.scalar_tile_coordinate();
- assert(_tiles.has_tile(src.value()));
- assert(coord.x >= 0);
- assert(coord.y >= 0);
- auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
- return _tiles.insert({{{val.str}}}, val.type.dt);
- }
- else
- {
- return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type()));
- }
- }
- // Create an anonymous tile with the tensor component
- else
- {
- assert(_arguments.has_tensor_argument(src.value()));
- auto x = _arguments[src.value()];
- const std::string val = x->component(to_tensor_component(src.type()));
- const DataType dt = x->component_data_type();
- return _tiles.insert({{{val}}}, dt);
- }
- }
-
-private:
- DataType to_tile_data_type(OperandType x)
- {
- return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff);
- }
-
- TensorComponent to_tensor_component(OperandType x)
- {
- switch(x)
- {
- case OperandType::TensorDim0:
- return TensorComponent::Dim0;
- case OperandType::TensorDim1:
- return TensorComponent::Dim1;
- case OperandType::TensorDim2:
- return TensorComponent::Dim2;
- case OperandType::TensorDim3:
- return TensorComponent::Dim3;
- case OperandType::TensorDim4:
- return TensorComponent::Dim4;
- case OperandType::TensorStride1:
- return TensorComponent::Stride1;
- case OperandType::TensorStride2:
- return TensorComponent::Stride2;
- case OperandType::TensorStride3:
- return TensorComponent::Stride3;
- case OperandType::TensorStride4:
- return TensorComponent::Stride4;
- case OperandType::TensorDim1xDim2:
- return TensorComponent::Dim1xDim2;
- case OperandType::TensorDim1xDim2xDim3:
- return TensorComponent::Dim1xDim2xDim3;
- case OperandType::TensorDataOffset:
- return TensorComponent::OffsetFirstElement;
- default:
- assert(false);
- return TensorComponent::Unknown;
- }
- }
-
- GpuTileRegistry& _tiles;
- GpuTensorArgumentRegistry& _arguments;
-};
-
-/**
- * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker
- * declare an anonymous tile in the tile registry.
- * Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure.
- */
-class TensorOperandUnpacker
-{
-public:
- TensorOperandUnpacker(GpuTensorArgumentRegistry& arguments) : _arguments(arguments)
- {
- };
-
- IGpuTensorArgument* unpack(const TensorOperand& src)
- {
- assert(_arguments.has_tensor_argument(src.value()));
- return _arguments[src.value()];
- }
-
-private:
- GpuTensorArgumentRegistry& _arguments;
-};
-
-/**
- * @brief The GpuKernel will be used in three occasions (stages):
- * #- Compilation stage
- * #- Tuning stage
- * #- Dispatch stage
- */
-struct GpuKernel
-{
- // Compilation stage
- std::string code {}; // Source code, required for the compilation stage
- std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage
- // Tuning stage
- std::string config_id {}; // Unique id, required for the tuning stage
- std::vector<LWS> list_lws{}; // LWS to test, required for the tuning stage
- // Dispatch stage
- GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
- std::vector<std::pair<int32_t, GpuTensorStorage>> list_tensor_storages; // List of tensor storages, required for the dispatch stage
- std::vector<std::pair<int32_t, TensorComponent>> list_tensor_components;// List of tensor components (width, stride,..), required for the dispatch stage)
-};
-
-// This function should produce an object with the source
-inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string& name)
-{
- std::string code;
- code += "__kernel void ";
- code += name;
- code += "(\n";
-
- auto IdSpaces = in.arguments.IdSpace_declarations();
-
- std::vector<std::string> arg_str;
-
- auto tensor_args = in.arguments.tensor_argument_declarations();
-
- for(auto &i : tensor_args)
- {
- // For each tensor used, get the storage and tensor components
- auto storages = i->storage_declarations();
- auto components = i->component_declarations();
-
- for(auto &y : storages)
- {
- std::string str;
- str += i->storage_type_declaration(y);
- str += " ";
- str += i->storage(y);
- arg_str.push_back(str);
- }
-
- for(auto &y : components)
- {
- std::string str;
- str += i->component_type_declaration();
- str += " ";
- str += i->component(y);
- arg_str.push_back(str);
- }
- }
-
- for(size_t i = 0; i < arg_str.size(); ++i)
- {
- code += arg_str[i];
- if(i + 1 < arg_str.size())
- {
- code += ",\n";
- }
- }
-
- code += ")\n";
- code += "{\n";
- code += in.code;
- code += "}\n";
-
- return code;
-}
-
-/**
- * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know
- * how to reduce the dimensionality of a tensor
- *
- */
-class GpuTensor3dMapper
-{
-public:
- GpuTensor3dMapper(IGpuTensorArgument* tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor)
- {
- };
-
- std::string tensor_component_x() const
- {
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- case TensorSamplerFormat::C_W_H:
- return _tensor->component(TensorComponent::C);
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- std::string tensor_component_y() const
- {
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- return _tensor->component(TensorComponent::WxH);
- case TensorSamplerFormat::C_W_H:
- return _tensor->component(TensorComponent::W);
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- std::string tensor_component_z() const
- {
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- return "1";
- case TensorSamplerFormat::C_W_H:
- return _tensor->component(TensorComponent::H);
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- std::string tensor_component_stride_y() const
- {
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- case TensorSamplerFormat::C_W_H:
- return _tensor->component(TensorComponent::Stride1);
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- std::string tensor_component_stride_z() const
- {
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- return "0";
- case TensorSamplerFormat::C_W_H:
- return _tensor->component(TensorComponent::Stride2);
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- std::string tensor_component_stride_batch() const
- {
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- case TensorSamplerFormat::C_W_H:
- return _tensor->component(TensorComponent::Stride3);
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- bool is_one_component_x() const
- {
- auto t = _tensor->format();
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- case TensorSamplerFormat::C_W_H:
- return t.shape[0] == 1;
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- bool is_one_component_y() const
- {
- auto t = _tensor->format();
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- return (t.shape[1] * t.shape[2]) == 1;
- case TensorSamplerFormat::C_W_H:
- return t.shape[1] == 1;
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- bool is_one_component_z() const
- {
- auto t = _tensor->format();
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- return true;
- case TensorSamplerFormat::C_W_H:
- return t.shape[2] == 1;
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- bool is_one_component_batch() const
- {
- auto t = _tensor->format();
- const auto format = _sampler.format;
- switch(format)
- {
- case TensorSamplerFormat::C_WH_1:
- case TensorSamplerFormat::C_W_H:
- return t.shape[3] == 1;
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- return "";
- }
- }
-
- GpuSampler gpu_sampler() const
- {
- return _sampler;
- }
-
- IGpuTensorArgument* tensor_argument() const
- {
- return _tensor;
- }
-
-private:
- GpuSampler _sampler;
- IGpuTensorArgument* _tensor;
-};
-
-struct GpuKernelWriterAttribute
-{
- bool return_tensor_component_by_value { false };
-};
-
-enum class ConvertPolicy
-{
- Wrap, /**< Wrap around */
- Saturate /**< Saturate */
-};
-
-enum class RoundingMode
-{
- None,
- Rte,
- Rtz,
- Rtp,
- Rtn
-};
-
-// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html
-class IGpuKernelWriter
-{
-public:
- virtual ~IGpuKernelWriter() = default;
- virtual void set_IdSpace(int32_t id) = 0;
- virtual void import_tile(const std::string& dst, const IVectorTile *src) = 0;
- virtual void declare_argument(const std::string& name, const TensorInfo& tensor) = 0;
- virtual void declare_tile(const std::string& name, const TileInfo& info) = 0;
- virtual void declare_const_tile(const std::string& name, const std::vector<std::vector<std::string>>& in, DataType dt) = 0;
- virtual void write_text(const std::string& x) = 0;
- virtual void compound_statement_begin() = 0;
- virtual void compound_statement_end() = 0;
-
- // Operations
- virtual void op_get_global_id(const Operand& dst_var, int32_t dim) = 0;
- virtual void op_get_global_coord(const Operand& dst, const Operand& step, const TensorOperand& tensor, int32_t dim) = 0;
- virtual void op_get_global_batch(const Operand& dst, const TensorOperand& tensor) = 0;
- virtual void op_get_global_size(const Operand& dst_var, int32_t dim) = 0;
- virtual void op_binary_expression(const Operand& dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
- virtual void op_assign(const Operand& dst_name, const Operand& src_name) = 0;
- virtual void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) = 0;
- virtual void op_if(const Operand& lhs, BinaryOp op, const Operand& rhs) = 0;
- virtual void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value, AssignmentOp update_op, const Operand& update_value) = 0;
- virtual void op_load_indirect(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y_indirect, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0;
- virtual void op_load_immediate(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32), const Operand& dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
- virtual void op_store_immediate(const TensorOperand& tensor, const Operand& src, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0;
- virtual void op_cast_expression(const Operand& dst, const Operand &src, ConvertPolicy policy) = 0;
- virtual void op_return() = 0;
- // virtual void op_else() = 0;
- // virtual void op_elseif() = 0;
- // Utils
- // It is the process of converting
- virtual void util_get_indirect_buffer(const Operand& dst, const TensorOperand& tensor, const Operand& x, const Operand& y, const Operand& x_off, const Operand& y_off) = 0;
-};
-
-enum class GpuLoadStoreType
-{
- Load = 1,
- Store = 2
-};
-
-class IGpuLoadStoreHelperWriter
-{
-public:
- IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type) : _writer(x), _mapper(mapper), _type(type) {}
- IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default;
- IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default;
- virtual ~IGpuLoadStoreHelperWriter() = default;
- virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0;
- virtual void write(const std::pair<int32_t, std::string>& y) = 0;
- virtual void finalize() = 0;
-protected:
- IGpuKernelWriter* _writer;
- GpuTensor3dMapper _mapper;
- GpuLoadStoreType _type;
-};
-
-class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter
-{
-public:
- ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type)
- {
- }
-
- ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default;
- ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
-
- static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
- {
- CKW_UNUSED(x, type, dst);
-
- if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
- {
- return false;
- }
- return true;
- }
-
- void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
- {
- assert(validate(_writer, _mapper, _type, dst));
-
- _dst = dst;
- _ls_width_full = dst->format().w;
-
- _coord_x = x->scalar(0, 0).str;
- _coord_z = z->scalar(0, 0).str;
- _coord_b = b->scalar(0, 0).str;
- _coord_orig_z = _coord_z;
-
- out_of_bound_initialize_x(_coord_x);
- out_of_bound_initialize_z(_coord_z);
-
- /*
- meaning of else:
- - x: partial load/store
- - y: no load/store operation
- - z: no load/store operation
- if(x)
- {
- if(z)
- {
- if(y)
- {
- // full load/store width
- }
- else
- {
- // no load/store
- }
- }
- else
- {
- // no load/store
- }
- }
- else
- {
- if(z)
- {
- if(y)
- {
- // partial load/store width
- }
- else
- {
- // no load/store
- }
- }
- else
- {
- // no load/store
- }
- }
- */
- }
-
- void write(const std::pair<int32_t, std::string>& y) override
- {
- int32_t idx_y = y.first;
- std::string coord_y = y.second;
-
- // The only check required is on Y.
- out_of_bound_initialize_y(coord_y);
-
- const std::string dst = _dst->vector(idx_y).str;
- const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
- const std::string ls_buf = to_ls_buffer(_type, _ls_width_full, dst, address);
-
- _writer->write_text(ls_buf);
- _writer->write_text(";\n");
-
- out_of_bound_finalize_y(dst);
-
- // The left over load/store will be written in the finalize stage
- if(_ls_width_part.size() != 0)
- {
- int32_t w = 0;
- for(auto &p : _ls_width_part)
- {
- const std::string dst0 = _dst->vector(w, p, idx_y).str;
- const std::string coord_x = _coord_x + " + " + std::to_string(w);
- const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
- const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address);
- _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0));
-
- w += p;
- }
- }
- }
-
- void finalize() override
- {
- out_of_bound_finalize_z();
- out_of_bound_finalize_x();
- }
-private:
- IVectorTile* _dst { nullptr };
- int32_t _ls_width_full { 0 };
- std::vector<int32_t> _ls_width_part { };
- std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x {};
- std::string _coord_x {};
- std::string _coord_z {};
- std::string _coord_orig_z {};
- std::string _coord_b {};
-
- void out_of_bound_initialize_x(std::string& coord)
- {
- if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
- {
- auto tensor_format = _mapper.tensor_argument()->format();
- auto shape = tensor_format.shape;
-
- _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
- if(_ls_width_part.size() != 0)
- {
- _writer->write_text("if(" + coord + " > 0)\n");
- _writer->compound_statement_begin();
- }
- }
- };
-
- void out_of_bound_finalize_x()
- {
- if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
- {
- if(_ls_width_part.size() != 0)
- {
- _writer->compound_statement_end();
- _writer->write_text("else\n");
- _writer->compound_statement_begin();
-
- out_of_bound_initialize_z(_coord_orig_z);
- for(auto &i : _leftovers_x)
- {
- out_of_bound_initialize_y(i.first.second);
- _writer->write_text(i.second);
- _writer->write_text(";\n");
- out_of_bound_finalize_y(i.first.first);
- }
- out_of_bound_finalize_z();
- _writer->compound_statement_end();
- }
- }
- };
-
- void out_of_bound_initialize_y(std::string& coord)
- {
- std::string max = "";
-
- const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
- switch(address_mode_y)
- {
- case TensorSamplerAddressModeY::Skip:
- case TensorSamplerAddressModeY::ClampToBorder:
- // NOTE: This line should not be moved outside of the switch statement.
- // The reason for that is because when we query the component, the component is marked as used
- // and added to the list of arguments of the kernel. Since, not in all cases this component is required,
- // we should request the component only when used
- max = _mapper.tensor_component_y();
- _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
- _writer->compound_statement_begin();
- break;
- case TensorSamplerAddressModeY::SkipMinEdgeOnly:
- case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
- _writer->write_text("if(" + coord + " >= 0)\n");
- _writer->compound_statement_begin();
- break;
- case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
- case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
- max = _mapper.tensor_component_y();
- _writer->write_text("if(" + coord + " < " + max + ")\n");
- _writer->compound_statement_begin();
- break;
- case TensorSamplerAddressModeY::ClampToNearest:
- max = _mapper.tensor_component_y();
- coord = "clamp(" + coord + ", 0, " + max + " - 1)";
- break;
- case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
- max = _mapper.tensor_component_y();
- coord = "min(" + coord + ", " + max + " - 1)";
- break;
- case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
- coord = "max(" + coord + ", 0)";
- break;
- case TensorSamplerAddressModeY::None:
- break;
- default:
- std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
- assert(false);
- }
- };
-
- void out_of_bound_finalize_y(const std::string& dst)
- {
- const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
- switch(address_mode_y)
- {
- case TensorSamplerAddressModeY::ClampToBorder:
- case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
- case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
- case TensorSamplerAddressModeY::Skip:
- case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
- case TensorSamplerAddressModeY::SkipMinEdgeOnly:
- _writer->compound_statement_end();
- break;
-
- default:
- assert(false);
- }
-
- switch(address_mode_y)
- {
- case TensorSamplerAddressModeY::ClampToBorder:
- case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
- case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
- _writer->write_text("else\n");
- _writer->compound_statement_begin();
- _writer->write_text(dst);
- _writer->write_text(" = 0.0f;\n");
- _writer->compound_statement_end();
- break;
-
- default:
- assert(false);
- }
- };
-
- void out_of_bound_initialize_z(std::string& coord)
- {
- std::string max = "";
-
- const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
-
- switch(address_mode_z)
- {
- case TensorSamplerAddressModeZ::Skip:
- max = _mapper.tensor_component_z();
- _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
- _writer->compound_statement_begin();
- break;
- case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
- _writer->write_text("if(" + coord + " >= 0)\n");
- _writer->compound_statement_begin();
- break;
- case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
- max = _mapper.tensor_component_z();
- _writer->write_text("if(" + coord + " < " + max + ")\n");
- _writer->compound_statement_begin();
- break;
- case TensorSamplerAddressModeZ::ClampToNearest:
- max = _mapper.tensor_component_z();
- coord = "clamp(" + coord + ", 0, " + max + " - 1)";
- break;
- case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly:
- max = _mapper.tensor_component_z();
- coord = "min(" + coord + ", " + max + " - 1)";
- break;
- case TensorSamplerAddressModeZ::ClampToMinEdgeOnly:
- coord = "max(" + coord + ", 0)";
- break;
- case TensorSamplerAddressModeZ::None:
- break;
- default:
- std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
- assert(false);
- }
- };
-
- void out_of_bound_finalize_z()
- {
- const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
-
- switch(address_mode_z)
- {
- case TensorSamplerAddressModeZ::Skip:
- case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
- case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
- _writer->compound_statement_end();
- break;
-
- default:
- assert(false);
- }
- };
-
- std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const
- {
- std::vector<int32_t> x;
-
- switch(ls_leftover_vector_width)
- {
- case 0:
- break;
- case 1:
- case 2:
- case 3:
- case 4:
- case 8:
- case 16:
- x.push_back(ls_leftover_vector_width);
- break;
- case 5:
- x.push_back(4);
- x.push_back(1);
- break;
- case 6:
- x.push_back(4);
- x.push_back(2);
- break;
- case 7:
- x.push_back(4);
- x.push_back(3);
- break;
- case 9:
- x.push_back(8);
- x.push_back(1);
- break;
- case 10:
- x.push_back(8);
- x.push_back(2);
- break;
- case 11:
- x.push_back(8);
- x.push_back(3);
- break;
- case 12:
- x.push_back(8);
- x.push_back(4);
- break;
- case 13:
- x.push_back(8);
- x.push_back(4);
- x.push_back(1);
- break;
- case 14:
- x.push_back(8);
- x.push_back(4);
- x.push_back(2);
- break;
- case 15:
- x.push_back(8);
- x.push_back(4);
- x.push_back(3);
- break;
-
- default:
- assert(false);
- }
- return x;
- }
-
- std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& address)
- {
- switch(type)
- {
- case GpuLoadStoreType::Load:
- if(vector_width != 1)
- {
- return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
- }
- else
- {
- return data + " = *(" + address + ")";
- }
- break;
- case GpuLoadStoreType::Store:
- if(vector_width != 1)
- {
- return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
- }
- else
- {
- return "*(" + address + ") = " + data;
- }
- break;
- default:
- std::cout << "Unsupported GpuLoadStoreType" << std::endl;
- assert(false);
- return "";
- }
- }
-
- std::string to_ls_buffer_address(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const
- {
- auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
- assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
- const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage);
- const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
-
- std::string address;
- address += "(__global ";
- address += dst_type;
- address += "*)(";
- address += ptr_buf;
- if(x != "0" && (_mapper.is_one_component_x() != true))
- {
- address += " + (";
- address += x + ") * sizeof(" + dst_type + ")";
- }
- if(y != "0" && (_mapper.is_one_component_y() != true))
- {
- const std::string stride_y = _mapper.tensor_component_stride_y();
- address += " + (";
- address += y + ")";
- address += " * ";
- address += stride_y;
- }
- if(z != "0" && (_mapper.is_one_component_z() != true))
- {
- const std::string stride_z = _mapper.tensor_component_stride_z();
- address += " + (";
- address += z + ")";
- address += " * ";
- address += stride_z;
- }
- if(b != "0" && (_mapper.is_one_component_batch() != true))
- {
- const std::string stride_b = _mapper.tensor_component_stride_batch();
- address += " + (";
- address += b + ")";
- address += " * ";
- address += stride_b;
- }
- address += ")";
- return address;
- }
-};
-
-class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
-{
-public:
- static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type, IVectorTile *dst)
- {
- CKW_UNUSED(x);
-
- if(dst->format().w != 4)
- {
- return false;
- }
- if(mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
- {
- return false;
- }
- if(mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
- {
- return false;
- }
- if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
- {
- return false;
- }
- if(mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly && type == GpuLoadStoreType::Store)
- {
- return false;
- }
- if((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
- {
- return false;
- }
- return true;
- /*
- - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4
- - z: Only GpuSamplerAddressModeZ::None is supported
- */
- }
- ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type)
- {
- }
-
- ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default;
- ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default;
-
- void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
- {
- assert(validate(_writer, _mapper, _type, dst));
-
- _dst = dst;
- _ls_width_full = dst->format().w;
- _coord_x = x->scalar(0, 0).str;
- _coord_z = z->scalar(0, 0).str;
- _coord_b = b->scalar(0, 0).str;
-
- /*
- if(y)
- {
- // full load/store width
- }
- else
- {
- // no load/store
- }
- */
- }
-
- void write(const std::pair<int32_t, std::string>& y) override
- {
- int32_t idx_y = y.first;
- std::string coord_y = y.second;
-
- // The only check required is on Y.
- out_of_bound_initialize_y(coord_y);
-
- const std::string dst = _dst->vector(idx_y).str;
- const std::string sampler = to_ls_image2d_sampler();
- const std::string coord = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b);
- const std::string ls_buf = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord);
-
- _writer->write_text(ls_buf);
- _writer->write_text(";\n");
-
- out_of_bound_finalize_y(dst);
- }
-
- void finalize() override
- {
- }
-private:
- IVectorTile* _dst { nullptr };
- int32_t _ls_width_full { 0 };
- std::string _coord_x {};
- std::string _coord_z {};
- std::string _coord_b {};
-
- void out_of_bound_initialize_y(std::string& coord)
- {
- std::string max = "";
-
- const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
- switch(address_mode_y)
- {
- case TensorSamplerAddressModeY::Skip:
- max = _mapper.tensor_component_y();
- _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
- _writer->compound_statement_begin();
- break;
- case TensorSamplerAddressModeY::SkipMinEdgeOnly:
- _writer->write_text("if(" + coord + " >= 0)\n");
- _writer->compound_statement_begin();
- break;
- case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
- max = _mapper.tensor_component_y();
- _writer->write_text("if(" + coord + " < " + max + ")\n");
- _writer->compound_statement_begin();
- break;
- case TensorSamplerAddressModeY::ClampToBorder:
- case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
- case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
- case TensorSamplerAddressModeY::ClampToNearest:
- case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
- case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
- case TensorSamplerAddressModeY::None:
- break;
- default:
- std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl;
- assert(false);
- }
- };
-
- void out_of_bound_finalize_y(const std::string& dst)
- {
- CKW_UNUSED(dst);
-
- const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
- switch(address_mode_y)
- {
- case TensorSamplerAddressModeY::Skip:
- case TensorSamplerAddressModeY::SkipMinEdgeOnly:
- case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
- _writer->compound_statement_end();
- break;
-
- default:
- assert(false);
- }
- };
-
- std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& sampler, const std::string& coord)
- {
- CKW_UNUSED(vector_width);
-
- auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
- const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage);
- // const DataType dt = _dst->format().dt;
- const std::string post_fix = _dst->format().dt == DataType::Fp32? "f" : "h";
-
- switch(type)
- {
- case GpuLoadStoreType::Load:
- return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
- break;
- case GpuLoadStoreType::Store:
- return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")";
- default:
- assert(false);
- std::cout << "Unsupported GpuLoadStoreType" << std::endl;
- assert(false);
- return "";
- }
- }
-
- std::string to_ls_image2d_sampler() const
- {
- const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
- switch(address_mode_y)
- {
- case TensorSamplerAddressModeY::None:
- return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
- case TensorSamplerAddressModeY::Skip:
- case TensorSamplerAddressModeY::SkipMinEdgeOnly:
- case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
- case TensorSamplerAddressModeY::ClampToBorder:
- case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
- case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
- return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
- case TensorSamplerAddressModeY::ClampToNearest:
- case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
- case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
- return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST";
- default:
- std::cout << "Unsupported address_mode_coord" << std::endl;
- assert(false);
- return "";
- }
- }
-
- std::string to_ls_image2d_coord(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const
- {
- std::string coord_x = "(" + x + ") >> 2";
- std::string coord_y = "(";
-
- if(y != "0" && (_mapper.is_one_component_y() != true))
- {
- coord_y += y;
- }
- if(z != "0" && (_mapper.is_one_component_z() != true))
- {
- const std::string dim = _mapper.tensor_component_y();
- coord_y += " + (";
- coord_y += z + ")";
- coord_y += " * ";
- coord_y += dim;
- }
- if(b != "0" && (_mapper.is_one_component_batch() != true))
- {
- const std::string dim0 = _mapper.tensor_component_y();
- const std::string dim1 = _mapper.tensor_component_z();
- coord_y += " + (";
- coord_y += b + ")";
- coord_y += " * ";
- coord_y += dim0;
- coord_y += " * ";
- coord_y += dim1;
- }
- coord_y += ")";
- return "(int2)(" + coord_x + ", " + coord_y + ")";
- }
-};
-
-/** IGpuLoadStoreHelperWriter factory class */
-class ClLoadStoreHelperWriterFactory final
-{
-public:
- /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper
- *
- *
- * @return IGpuLoadStoreHelperWriter
- */
- static std::unique_ptr<IGpuLoadStoreHelperWriter> create(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type)
- {
- const auto tensor_storage = mapper.gpu_sampler().storage;
- switch(tensor_storage)
- {
- case GpuSamplerTensorStorage::BufferUint8Ptr:
- return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
- case GpuSamplerTensorStorage::Image2dReadOnly:
- case GpuSamplerTensorStorage::Image2dWriteOnly:
- return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type);
- default:
- std::cout << "Unsupported Gpu tensor storage" << std::endl;
- assert(false);
- return nullptr;
- }
- }
-};
-
-// This utility method needs to go in utils.h
-inline bool is_tile_scalar(IVectorTile* x)
-{
- return x->format().w == 1 && x->format().h == 1;
-}
-
-class ClKernelWriter : public IGpuKernelWriter
-{
-public:
- ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
- {
- _data = x;
- _attr = attr;
- }
-
- ClKernelWriter(const ClKernelWriter &) = default;
- ClKernelWriter &operator=(const ClKernelWriter &) = default;
-
- // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure
- // there are no conflicts or ambiguity in the code
- void set_IdSpace(int32_t id) override
- {
- _data->tiles.set_IdSpace(id);
- _data->arguments.set_IdSpace(id);
- }
-
- void import_tile(const std::string& dst_name, const IVectorTile *src) override
- {
- _data->tiles.insert(dst_name, src);
- }
-
- void declare_argument(const std::string& name, const TensorInfo& tensor) override
- {
- assert(_data->arguments[name] == nullptr);
- _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value);
- }
-
- void declare_tile(const std::string& name, const TileInfo& format) override
- {
- assert(_data->tiles[name] == nullptr);
- _data->tiles.insert(name, format);
-
- IVectorTile *x = _data->tiles[name];
-
- for(auto &t : x->underlying_source_variables())
- {
- _data->code += t.type.str + " " + t.str + ";\n";
- }
- }
-
- void declare_const_tile(const std::string& name, const std::vector<std::vector<std::string>>& in, DataType dt) override
- {
- assert(_data->tiles[name] == nullptr);
- _data->tiles.insert(name, in, dt);
- // Note: A constant does not need to be declared in the code
- }
-
- void write_text(const std::string& x) override
- {
- _data->code += x;
- }
-
- void compound_statement_begin() override
- {
- _data->tiles.increment_registry_level();
- _data->code += "{\n";
- }
-
- void compound_statement_end() override
- {
- _data->tiles.decrement_registry_level();
- _data->code += "}\n";
- }
-
- void op_get_global_id(const Operand& dst_var, int32_t dim) override
- {
- assert(dst_var.type() == OperandType::Tile);
- assert(_data->tiles.has_tile(dst_var.value()));
- assert(_data->tiles[dst_var.value()]->format().w == 1 &&
- _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
-
- auto var = _data->tiles[dst_var.value()];
-
- _data->code += var->scalar(0, 0).str;
- _data->code += " = get_global_id(";
- _data->code += std::to_string(dim);
- _data->code += ");\n";
- };
-
- void op_get_global_coord(const Operand& o_dst, const Operand& o_step, const TensorOperand& o_tensor, int32_t dim) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto dst = operands.unpack(o_dst);
- auto step = operands.unpack(o_step);
-
- // Validation: Check that x, y and z are scalar
-
- TensorOperandUnpacker tensor_operands(_data->arguments);
- auto tensor = tensor_operands.unpack(o_tensor);
- auto gpu_sampler = o_tensor.sampler();
-
- GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
- switch (dim)
- {
- case 0:
- if(mapper.is_one_component_x())
- {
- _data->code += dst->scalar(0, 0).str;
- _data->code += " = 0;\n";
- }
- else
- {
- if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
- {
- // Validation: Check: fixed tensor shape
- // TO BE CHANGED
- _data->code += dst->scalar(0, 0).str;
- _data->code += " = get_global_id(0) * ";
- _data->code += step->scalar(0, 0).str;
- _data->code += ";\n";
- }
- else
- {
- _data->code += dst->scalar(0, 0).str;
- _data->code += " = get_global_id(0) * ";
- _data->code += step->scalar(0, 0).str;
- _data->code += ";\n";
- }
- }
- break;
- case 1:
- if(mapper.is_one_component_y())
- {
- _data->code += dst->scalar(0, 0).str;
- _data->code += " = 0;\n";
- }
- else
- {
- if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
- {
-
- }
- else
- {
- _data->code += dst->scalar(0, 0).str;
- _data->code += " = get_global_id(1) * ";
- _data->code += step->scalar(0, 0).str;
- _data->code += ";\n";
- }
- }
- break;
- case 2:
- if(mapper.is_one_component_z())
- {
- _data->code += dst->scalar(0, 0).str;
- _data->code += " = 0;\n";
- }
- else
- {
- _data->code += dst->scalar(0, 0).str;
- _data->code += " = get_global_id(2) * ";
- _data->code += step->scalar(0, 0).str;
- _data->code += ";\n";
- }
- break;
- default:
- break;
- }
- };
-
- void op_get_global_batch(const Operand& o_dst, const TensorOperand& o_tensor) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto dst = operands.unpack(o_dst);
-
- TensorOperandUnpacker tensor_operands(_data->arguments);
- auto tensor = tensor_operands.unpack(o_tensor);
- auto gpu_sampler = o_tensor.sampler();
-
- GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
- if(mapper.is_one_component_batch())
- {
- _data->code += dst->scalar(0, 0).str;
- _data->code += " = 0;\n";
- }
- else
- {
- std::cout << "Unsupported batched computation" << std::endl;
- assert(false);
- }
- };
-
- void op_get_global_size(const Operand& dst_var, int32_t dim) override
- {
- assert(dst_var.type() == OperandType::Tile);
- assert(_data->tiles.has_tile(dst_var.value()));
- assert(_data->tiles[dst_var.value()]->format().w == 1 &&
- _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
-
- auto var = _data->tiles[dst_var.value()];
-
- _data->code += var->scalar(0, 0).str;
- _data->code += " = get_global_size(";
- _data->code += std::to_string(dim);
- _data->code += ");\n";
- }
-
- void op_binary_expression(const Operand& dst_name, const Operand& lhs_name, BinaryOp op, const Operand& rhs_name) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto lhs = operands.unpack(lhs_name);
- auto rhs = operands.unpack(rhs_name);
- auto dst = operands.unpack(dst_name);
-
- const int32_t dst_w = dst->format().w;
- const int32_t dst_h = dst->format().h;
- assert(lhs != nullptr);
- const int32_t lhs_w = lhs->format().w;
- const int32_t rhs_w = rhs->format().w;
-
- if(op == BinaryOp::MatMul_Nt_T)
- {
- assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
- for(int32_t y = 0; y < dst_h; ++y)
- {
- for(int32_t x = 0; x < dst_w; ++x)
- {
- for(int32_t k = 0; k < lhs_w; ++k)
- {
- _data->code += dst->scalar(x, y).str;
- _data->code += " = fma(";
- _data->code += lhs->scalar(k, y).str;
- _data->code += ", ";
- _data->code += rhs->scalar(k, x).str;
- _data->code += ", ";
- _data->code += dst->scalar(x, y).str;
- _data->code += ");\n";
- }
- }
- }
-
- return;
- }
-
- bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
- bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
-
- std::string lhs_prefix = broadcast_lhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
- std::string rhs_prefix = broadcast_rhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
- std::string op_str = to_string(op);
-
- // Broadcasting on Y is automatic
- for(int32_t y = 0; y < dst_h; ++y)
- {
- _data->code += dst->vector(y).str;
- _data->code += " = ";
- _data->code += lhs_prefix + lhs->vector(y).str;
- _data->code += " ";
- _data->code += op_str;
- _data->code += " ";
- _data->code += rhs_prefix + rhs->vector(y).str;
- _data->code += ";\n";
- }
- };
-
- void op_cast_expression(const Operand& o_dst, const Operand &o_src, ConvertPolicy policy) override
- {
- CKW_UNUSED(policy);
-
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto src = operands.unpack(o_src);
- auto dst = operands.unpack(o_dst);
-
- // const int32_t dst_w = dst->format().w;
- const int32_t dst_h = dst->format().h;
- const std::string dt = dst->scalar(0, 0).type.str;
-
- // Broadcasting on Y is automatic
- for(int32_t y = 0; y < dst_h; ++y)
- {
- _data->code += dst->vector(y).str;
- _data->code += " = convert_" + dt + "(";
- _data->code += src->vector(y).str;
- _data->code += ");\n";
- }
- };
-
- void op_assign(const Operand& dst_name, const Operand& src_name) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto src = operands.unpack(src_name);
- auto dst = operands.unpack(dst_name);
-
- const int32_t dst_w = dst->format().w;
- const int32_t dst_h = dst->format().h;
- const int32_t src_w = src->format().w;
- // const int32_t src_h = src->format().h;
- const std::string dt = dst->scalar(0, 0).type.str;
-
- bool broadcast_src_x = dst_w != 1 && src_w == 1;
-
- std::string src_prefix = broadcast_src_x? "(" + dt + ")" : "";
-
- // Broadcasting on Y is automatic
- for(int32_t y = 0; y < dst_h; ++y)
- {
- _data->code += dst->vector(y).str;
- _data->code += " = ";
- _data->code += src_prefix + src->vector(y).str;
- _data->code += ";\n";
- }
- }
-
- void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto src = operands.unpack(src_name);
- auto dst = operands.unpack(dst_name);
-
- const int32_t dst_w = dst->format().w;
- const int32_t dst_h = dst->format().h;
- const int32_t src_w = src->format().w;
- // const int32_t src_h = src->format().h;
- const std::string dt = dst->scalar(0, 0).type.str;
-
- bool broadcast_src_x = dst_w != 1 && src_w == 1;
-
- std::string src_prefix = broadcast_src_x? "(" + dt + ")" : "";
-
- // Broadcasting on Y is automatic
- for(int32_t y = 0; y < dst_h; ++y)
- {
- _data->code += dst->vector(y).str;
- _data->code += " = ";
-
- switch(func)
- {
- case ScalarUnaryFunction::Exp:
- _data->code += "exp(";
- break;
-
- default:
- CKW_ASSERT(false);
- }
-
- _data->code += src_prefix + src->vector(y).str;
- _data->code += ");\n";
- }
- }
-
- void op_if(const Operand& o_lhs, BinaryOp op, const Operand& o_rhs) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto lhs = operands.unpack(o_lhs);
- auto rhs = operands.unpack(o_rhs);
-
- assert(is_tile_scalar(lhs));
- assert(is_tile_scalar(rhs));
-
- _data->code += "if(";
- _data->code += lhs->scalar(0, 0).str;
- _data->code += " ";
- _data->code += to_string(op);
- _data->code += " ";
- _data->code += rhs->scalar(0, 0).str;
- _data->code += ")\n";
- }
-
- void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, AssignmentOp update_op, const Operand& update_value_name) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto var = operands.unpack(var_name);
- auto cond_value = operands.unpack(cond_value_name);
- auto update_value = operands.unpack(update_value_name);
-
- const int32_t dst_w = var->format().w;
- const int32_t dst_h = var->format().h;
-
- // It must be a scalar variable
- CKW_UNUSED(dst_w, dst_h);
- assert(dst_w == 1);
- assert(dst_h == 1);
-
- _data->code += "for(; " ;
- _data->code += var->scalar(0, 0).str;
- _data->code += " ";
- _data->code += to_string(cond_op);
- _data->code += " " + cond_value->scalar(0, 0).str + "; ";
- _data->code += var->scalar(0, 0).str;
- _data->code += " ";
- _data->code += to_string(update_op);
- _data->code += " " + update_value->scalar(0, 0).str + ")";
- _data->code += "\n";
- }
-
- void op_load_immediate(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_y, const Operand& o_z, const Operand& o_batch_idx, const Operand& dilation_y) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto dst = operands.unpack(o_dst);
- auto x = operands.unpack(o_x);
- auto y = operands.unpack(o_y);
- auto z = operands.unpack(o_z);
- auto dil_y = operands.unpack(dilation_y);
- auto b = operands.unpack(o_batch_idx);
-
- TensorOperandUnpacker tensor_operands(_data->arguments);
- auto tensor = tensor_operands.unpack(o_tensor);
- auto gpu_sampler = o_tensor.sampler();
-
- GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
- auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
-
- // Initialize the constant part
- load_writer->initialize(dst, x, z, b);
-
- for(int i = 0; i < dst->format().h; ++i)
- {
- std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
- if(dil_y->scalar(0, 0).str != "1")
- {
- coord_y += " * " + dil_y->scalar(0, 0).str;
- }
- load_writer->write(std::make_pair(i, coord_y));
- }
-
- load_writer->finalize();
- }
-
- void op_load_indirect(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_indirect_h, const Operand& o_z, const Operand& o_batch_idx) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto dst = operands.unpack(o_dst);
- auto x = operands.unpack(o_x);
- auto y_ind = operands.unpack(o_indirect_h);
- auto z = operands.unpack(o_z);
- auto b = operands.unpack(o_batch_idx);
-
- TensorOperandUnpacker tensor_operands(_data->arguments);
- auto tensor = tensor_operands.unpack(o_tensor);
- auto gpu_sampler = o_tensor.sampler();
-
- GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
- auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
-
- // Initialize the constant part
- load_writer->initialize(dst, x, z, b);
-
- for(int i = 0; i < dst->format().h; ++i)
- {
- load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
- }
-
- load_writer->finalize();
- }
-
- void op_store_immediate(const TensorOperand& tensor_name, const Operand& src_name, const Operand& x_name, const Operand& y_name, const Operand& z_name, const Operand& batch_index_name) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto src = operands.unpack(src_name);
- auto x = operands.unpack(x_name);
- auto y = operands.unpack(y_name);
- auto z = operands.unpack(z_name);
- auto b = operands.unpack(batch_index_name);
-
- TensorOperandUnpacker tensor_operands(_data->arguments);
- auto tensor = tensor_operands.unpack(tensor_name);
- auto gpu_sampler = tensor_name.sampler();
-
- GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
- auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store);
-
- // Initialize the constant part
- store_writer->initialize(src, x, z, b);
-
- int32_t tile_h = src->format().h;
-
- for(int m0 = tile_h - 1; m0 >= 0; m0--)
- {
- store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
- }
-
- store_writer->finalize();
- }
-
- void op_return() override
- {
- _data->code += "return;\n";
- }
-
- void util_get_indirect_buffer(const Operand& o_dst, const TensorOperand& o_tensor, const Operand& o_x, const Operand& o_y, const Operand& o_x_off, const Operand& o_y_off) override
- {
- OperandUnpacker operands(_data->tiles, _data->arguments);
- auto dst = operands.unpack(o_dst);
- auto x = operands.unpack(o_x);
- auto y = operands.unpack(o_y);
- auto x_off = operands.unpack(o_x_off);
- auto y_off = operands.unpack(o_y_off);
-
- TensorOperandUnpacker tensor_operands(_data->arguments);
- auto tensor = tensor_operands.unpack(o_tensor);
-
- assert(dst->format().w == 1);
- assert(x->format().w == 1);
- assert(y->format().w == 1);
- assert(x_off->format().w == 1);
- assert(y_off->format().w == 1);
- assert(dst->format().dt == DataType::Int32);
- assert(x->format().dt == DataType::Int32);
- assert(y->format().dt == DataType::Int32);
- assert(x_off->format().dt == DataType::Int32);
- assert(y_off->format().dt == DataType::Int32);
-
- const std::string width = tensor->component(TensorComponent::W);
- const std::string height = tensor->component(TensorComponent::H);
- const std::string wxh = tensor->component(TensorComponent::WxH);
- /*
- int x_s;
- int y_s;
- x_s = (xi_0 + x_k);
- y_s = (yi_0 + y_k);
- mi_0 = x_s + y_s * width + b * widthxheight;
- mi_0 = select(-1, mi_0, x_s >= 0);
- mi_0 = select(-1, mi_0, y_s >= 0);
- mi_0 = select(-1, mi_0, x_s < 128);
- mi_0 = select(-1, mi_0, y_s < 128);
- */
- compound_statement_begin();
- declare_tile("_x_s", TileInfo(DataType::Int32));
- declare_tile("_y_s", TileInfo(DataType::Int32));
- auto x_s = operands.unpack(Operand("_x_s"));
- auto y_s = operands.unpack(Operand("_y_s"));
- for(int i = 0; i < dst->format().h; ++i)
- {
- // x_s = (xi_0 + x_k);
- // y_s = (yi_0 + y_k);
- _data->code += x_s->scalar(0, i).str;
- _data->code += " = (";
- _data->code += x->scalar(0, i).str;
- _data->code += " + ";
- _data->code += x_off->scalar(0, i).str;
- _data->code += ");\n";
- _data->code += y_s->scalar(0, i).str;
- _data->code += " = (";
- _data->code += y->scalar(0, i).str;
- _data->code += " + ";
- _data->code += y_off->scalar(0, i).str;
- _data->code += ");\n";
- // mi_0 = x_s + y_s * width;
- _data->code += dst->scalar(0, i).str;
- _data->code += " = ";
- _data->code += x_s->scalar(0, i).str;
- _data->code += " + ";
- _data->code += y_s->scalar(0, i).str;
- _data->code += " * " + width + ";\n";
- // mi_0 = select(wxh, mi_0, x_s >= 0);
- _data->code += dst->scalar(0, i).str;
- _data->code += " = select(-1, ";
- _data->code += dst->scalar(0, i).str;
- _data->code += ", ";
- _data->code += x_s->scalar(0, i).str;
- _data->code += " >= 0);\n";
- // mi_0 = select(wxh, mi_0, y_s >= 0);
- _data->code += dst->scalar(0, i).str;
- _data->code += " = select(-1, ";
- _data->code += dst->scalar(0, i).str;
- _data->code += ", ";
- _data->code += y_s->scalar(0, i).str;
- _data->code += " >= 0);\n";
- // mi_0 = select(wxh, mi_0, x_s < width);
- _data->code += dst->scalar(0, i).str;
- _data->code += " = select(-1, ";
- _data->code += dst->scalar(0, i).str;
- _data->code += ", ";
- _data->code += x_s->scalar(0, i).str;
- _data->code += " < ";
- _data->code += width + ");\n";
- // mi_0 = select(wxh, mi_0, y_s < height);
- _data->code += dst->scalar(0, i).str;
- _data->code += " = select(-1, ";
- _data->code += dst->scalar(0, i).str;
- _data->code += ", ";
- _data->code += y_s->scalar(0, i).str;
- _data->code += " < ";
- _data->code += height + ");\n";
- }
- compound_statement_end();
- }
-
-private:
- GpuKernelWriterDataHolder* _data { nullptr };
- GpuKernelWriterAttribute * _attr { nullptr };
-};
-
-/** IGpuKernelWriter factory class */
-class GpuKernelWriterFactory final
-{
-public:
- /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language
- *
- * @param[in] gpu GPU target
- *
- * @return IGpuKernelWriter
- */
- static std::unique_ptr<IGpuKernelWriter> create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
- {
- switch(x->programming_language())
- {
- case GpuTargetLanguage::OpenCL:
- return std::make_unique<ClKernelWriter>(attr, x);
- default:
- std::cout << "Unsupported Gpu programming language" << std::endl;
- assert(false);
- return nullptr;
- }
- }
-};
-
-inline int32_t adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx)
-{
- auto tensor = tensor_info_id->shape;
-
- int32_t dim[3] = {0};
-
- switch(tensor_format)
- {
- case TensorSamplerFormat::C_W_H:
- dim[0] = tensor[0];
- dim[1] = tensor[1];
- dim[2] = tensor[2];
- break;
- case TensorSamplerFormat::C_WH_1:
- dim[0] = tensor[0];
- dim[1] = tensor[1] * tensor[2];
- dim[2] = 1;
- break;
- default:
- std::cout << "Unsupported tensor format" << std::endl;
- assert(false);
- break;
- }
-
- return std::min(step, dim[idx]);
-}
-
-} // namespace prototype
-} // namespace ckw
-
-#endif // CKW_SRC_PROTOTYPE_H
diff --git a/compute_kernel_writer/src/TensorOperand.cpp b/compute_kernel_writer/src/TensorOperand.cpp
deleted file mode 100644
index 00ecc3824e..0000000000
--- a/compute_kernel_writer/src/TensorOperand.cpp
+++ /dev/null
@@ -1,247 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/TensorOperand.h"
-#include "ckw/Error.h"
-#include "ckw/Kernel.h"
-#include "ckw/TileOperand.h"
-#include "src/Prototype.h"
-
-namespace ckw
-{
-
-namespace
-{
-
-inline TensorComponentOperand &get_or_create_component(std::unique_ptr<TensorComponentOperand> &ptr, const ::std::string &name, TensorComponent component)
-{
- if(ptr == nullptr)
- {
- ptr = std::make_unique<TensorComponentOperand>(name, component);
- }
-
- return *ptr;
-}
-
-} // namespace
-
-// =================================================================================================
-// TensorOperand
-// =================================================================================================
-
-TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info)
- : OperandBase(name), _info(info)
-{
-}
-
-prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
-{
- CKW_UNUSED(writer);
- return { name() };
-}
-
-const TensorInfo &TensorOperand::info() const
-{
- return _info;
-}
-
-TensorInfo &TensorOperand::info()
-{
- return _info;
-}
-
-DataType TensorOperand::data_type() const
-{
- return _info.data_type();
-}
-
-bool TensorOperand::is_constant() const
-{
- return false;
-}
-
-const TileOperand &TensorOperand::tile() const
-{
- return *_tile;
-}
-
-TileOperand &TensorOperand::tile()
-{
- return *_tile;
-}
-
-TensorOperand &TensorOperand::tile(TileOperand &tile)
-{
- _tile = &tile;
- return *this;
-}
-
-const TensorTileSampler &TensorOperand::tile_sampler() const
-{
- return _tile_sampler;
-}
-
-TensorTileSampler &TensorOperand::tile_sampler()
-{
- return _tile_sampler;
-}
-
-TensorOperand &TensorOperand::tile_sampler(const TensorTileSampler &value)
-{
- _tile_sampler = value;
- return *this;
-}
-
-TileOperand &TensorOperand::stride1()
-{
- return get_or_create_component(_stride1, name(), TensorComponent::Stride1);
-}
-
-TileOperand &TensorOperand::stride2()
-{
- return get_or_create_component(_stride2, name(), TensorComponent::Stride2);
-}
-
-TileOperand &TensorOperand::stride3()
-{
- return get_or_create_component(_stride3, name(), TensorComponent::Stride3);
-}
-
-TileOperand &TensorOperand::stride4()
-{
- return get_or_create_component(_stride4, name(), TensorComponent::Stride4);
-}
-
-TileOperand &TensorOperand::dim0()
-{
- return get_or_create_component(_dim0, name(), TensorComponent::Dim0);
-}
-
-TileOperand &TensorOperand::dim1()
-{
- return get_or_create_component(_dim1, name(), TensorComponent::Dim1);
-}
-
-TileOperand &TensorOperand::dim2()
-{
- return get_or_create_component(_dim2, name(), TensorComponent::Dim2);
-}
-
-TileOperand &TensorOperand::dim3()
-{
- return get_or_create_component(_dim3, name(), TensorComponent::Dim3);
-}
-
-TileOperand &TensorOperand::dim4()
-{
- return get_or_create_component(_dim4, name(), TensorComponent::Dim4);
-}
-
-TileOperand &TensorOperand::dim1_dim2()
-{
- return get_or_create_component(_dim1_dim2, name(), TensorComponent::Dim1xDim2);
-}
-
-TileOperand &TensorOperand::dim1_dim2_dim3()
-{
- return get_or_create_component(_dim1_dim2_dim3, name(), TensorComponent::Dim1xDim2xDim3);
-}
-
-TileOperand &TensorOperand::offset_first_element_in_bytes()
-{
- return get_or_create_component(_offset_first_element_in_bytes, name(), TensorComponent::OffsetFirstElement);
-}
-
-// =================================================================================================
-// TensorComponentOperand
-// =================================================================================================
-
-TensorComponentOperand::TensorComponentOperand(const ::std::string &name, TensorComponent component)
- : TileOperand(name, DataType::Int32), _component(component)
-{
-}
-
-prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
-{
- CKW_UNUSED(writer);
- prototype::OperandType type{ prototype::OperandType::Unknown };
-
- switch(_component)
- {
- case TensorComponent::OffsetFirstElement:
- type = prototype::OperandType::TensorDataOffset;
- break;
-
- case TensorComponent::Stride1:
- type = prototype::OperandType::TensorStride1;
- break;
-
- case TensorComponent::Stride2:
- type = prototype::OperandType::TensorStride2;
- break;
-
- case TensorComponent::Stride3:
- type = prototype::OperandType::TensorStride3;
- break;
-
- case TensorComponent::Stride4:
- type = prototype::OperandType::TensorStride4;
- break;
-
- case TensorComponent::Dim0:
- type = prototype::OperandType::TensorDim0;
- break;
-
- case TensorComponent::Dim1:
- type = prototype::OperandType::TensorDim1;
- break;
-
- case TensorComponent::Dim2:
- type = prototype::OperandType::TensorDim2;
- break;
-
- case TensorComponent::Dim3:
- type = prototype::OperandType::TensorDim3;
- break;
-
- case TensorComponent::Dim4:
- type = prototype::OperandType::TensorDim4;
- break;
-
- case TensorComponent::Dim1xDim2:
- type = prototype::OperandType::TensorDim1xDim2;
- break;
-
- case TensorComponent::Dim1xDim2xDim3:
- type = prototype::OperandType::TensorDim1xDim2xDim3;
- break;
-
- default:
- CKW_ASSERT(false);
- }
-
- return prototype::Operand(name(), type);
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/src/TensorTileSampler.cpp b/compute_kernel_writer/src/TensorTileSampler.cpp
deleted file mode 100644
index 143d550dec..0000000000
--- a/compute_kernel_writer/src/TensorTileSampler.cpp
+++ /dev/null
@@ -1,167 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/TensorTileSampler.h"
-#include "ckw/TileOperand.h"
-#include "ckw/Types.h"
-
-namespace ckw
-{
-
-TensorTileSampler::TensorTileSampler()
-{
-}
-
-TensorTileSampler::TensorTileSampler(
- TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
- TensorSamplerFormat format,
- TensorSamplerAddressModeX address_mode_x,
- TensorSamplerAddressModeY address_mode_y,
- TensorSamplerAddressModeZ address_mode_z)
- : _x(&x), _y(&y), _z(&z), _b(&b), _height(0), _width(0), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z)
-{
-}
-
-TensorTileSampler::TensorTileSampler(
- TileOperand &x, TileOperand &y, TileOperand &z, TileOperand &b,
- int32_t height, int32_t width,
- TensorSamplerFormat format,
- TensorSamplerAddressModeX address_mode_x,
- TensorSamplerAddressModeY address_mode_y,
- TensorSamplerAddressModeZ address_mode_z)
- : _x(&x), _y(&y), _z(&z), _b(&b), _height(height), _width(width), _format(format), _address_mode_x(address_mode_x), _address_mode_y(address_mode_y), _address_mode_z(address_mode_z)
-{
-}
-
-const TileOperand &TensorTileSampler::x() const
-{
- return *_x;
-}
-
-TensorTileSampler &TensorTileSampler::x(TileOperand &x)
-{
- _x = &x;
- return *this;
-}
-
-const TileOperand &TensorTileSampler::y() const
-{
- return *_y;
-}
-
-TensorTileSampler &TensorTileSampler::y(TileOperand &y)
-{
- _y = &y;
- return *this;
-}
-
-const TileOperand &TensorTileSampler::z() const
-{
- return *_z;
-}
-
-TensorTileSampler &TensorTileSampler::z(TileOperand &z)
-{
- _z = &z;
- return *this;
-}
-
-const TileOperand &TensorTileSampler::b() const
-{
- return *_b;
-}
-
-TensorTileSampler &TensorTileSampler::b(TileOperand &b)
-{
- _b = &b;
- return *this;
-}
-
-int32_t TensorTileSampler::width() const
-{
- return _width;
-}
-
-TensorTileSampler &TensorTileSampler::width(int32_t width)
-{
- _width = width;
- return *this;
-}
-
-int32_t TensorTileSampler::height() const
-{
- return _height;
-}
-
-TensorTileSampler &TensorTileSampler::height(int32_t height)
-{
- _height = height;
- return *this;
-}
-
-TensorSamplerFormat TensorTileSampler::format() const
-{
- return _format;
-}
-
-TensorTileSampler &TensorTileSampler::format(TensorSamplerFormat format)
-{
- _format = format;
- return *this;
-}
-
-TensorSamplerAddressModeX TensorTileSampler::address_mode_x() const
-{
- return _address_mode_x;
-}
-
-TensorTileSampler &TensorTileSampler::address_mode_x(TensorSamplerAddressModeX address_mode_x)
-{
- _address_mode_x = address_mode_x;
- return *this;
-}
-
-TensorSamplerAddressModeY TensorTileSampler::address_mode_y() const
-{
- return _address_mode_y;
-}
-
-TensorTileSampler &TensorTileSampler::address_mode_y(TensorSamplerAddressModeY address_mode_y)
-{
- _address_mode_y = address_mode_y;
- return *this;
-}
-
-TensorSamplerAddressModeZ TensorTileSampler::address_mode_z() const
-{
- return _address_mode_z;
-}
-
-TensorTileSampler &TensorTileSampler::address_mode_z(TensorSamplerAddressModeZ address_mode_z)
-{
- _address_mode_z = address_mode_z;
- return *this;
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/src/TileOperand.cpp b/compute_kernel_writer/src/TileOperand.cpp
deleted file mode 100644
index 091947628d..0000000000
--- a/compute_kernel_writer/src/TileOperand.cpp
+++ /dev/null
@@ -1,104 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/TileOperand.h"
-#include "ckw/Error.h"
-#include "src/Prototype.h"
-
-namespace ckw
-{
-
-TileOperand::TileOperand(const std::string &name, const TileInfo &info)
- : OperandBase(name), _info(info), _value{ 0 }, _constant(false)
-{
-}
-
-TileOperand::TileOperand(const std::string &name, DataType data_type)
- : OperandBase(name), _info(TileInfo{ data_type }), _value(0), _constant(false)
-{
-}
-
-TileOperand::TileOperand(const std::string &name, int32_t value)
- : OperandBase(name), _info(TileInfo{ DataType::Int32 }), _value(value), _constant(true)
-{
-}
-
-TileOperand::TileOperand(const std::string &name, float value)
- : OperandBase(name), _info(TileInfo{ DataType::Fp32 }), _value(value), _constant(true)
-{
-}
-
-prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
-{
- CKW_UNUSED(writer);
-
- if(_constant)
- {
- switch(_info.data_type())
- {
- case DataType::Int32:
- return prototype::Operand(std::to_string(_value.get<int32_t>()), prototype::OperandType::ScalarInt32);
-
- case DataType::Fp32:
- return prototype::Operand(std::to_string(_value.get<float>()), prototype::OperandType::ScalarFp32);
-
- default:
- CKW_ASSERT(false);
- }
- }
- else
- {
- return prototype::Operand(name(), prototype::OperandType::Tile);
- }
-}
-
-const TileInfo &TileOperand::tile_info() const
-{
- return _info;
-}
-
-DataType TileOperand::data_type() const
-{
- return _info.data_type();
-}
-
-bool TileOperand::is_constant() const
-{
- return _constant;
-}
-
-bool TileOperand::is_scalar() const
-{
- return _info.width() == 1 && _info.height() == 1;
-}
-
-ScalarValue TileOperand::scalar_value() const
-{
- CKW_ASSERT(is_scalar());
- CKW_ASSERT(is_constant());
-
- return _value;
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/src/acl/AclComponentArgument.cpp b/compute_kernel_writer/src/acl/AclComponentArgument.cpp
deleted file mode 100644
index 5cb909021e..0000000000
--- a/compute_kernel_writer/src/acl/AclComponentArgument.cpp
+++ /dev/null
@@ -1,97 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "acl/AclComponentArgument.h"
-#include "ckw/Error.h"
-
-AclComponentArgument::AclComponentArgument()
-{
-}
-
-AclComponentArgument::AclComponentArgument(ckw::TensorOperand &tensor)
- : _tensor(&tensor)
-{
-}
-
-AclComponentArgument &AclComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler)
-{
- CKW_ASSERT(_tile == nullptr);
-
- _tile = &tile;
- _tile_sampler = tile_sampler;
-
- return *this;
-}
-
-bool AclComponentArgument::has_tensor() const
-{
- return _tensor != nullptr;
-}
-
-ckw::TensorOperand &AclComponentArgument::tensor()
-{
- CKW_ASSERT(_tensor != nullptr);
-
- return *_tensor;
-}
-
-const ckw::TensorOperand &AclComponentArgument::tensor() const
-{
- CKW_ASSERT(_tensor != nullptr);
-
- return *_tensor;
-}
-
-bool AclComponentArgument::has_tile() const
-{
- return _tile != nullptr;
-}
-
-ckw::TileOperand &AclComponentArgument::tile()
-{
- CKW_ASSERT(_tile != nullptr);
-
- return *_tile;
-}
-
-const ckw::TileOperand &AclComponentArgument::tile() const
-{
- CKW_ASSERT(_tile != nullptr);
-
- return *_tile;
-}
-
-ckw::TensorTileSampler &AclComponentArgument::tile_sampler()
-{
- CKW_ASSERT(_tile != nullptr);
-
- return _tile_sampler;
-}
-
-const ckw::TensorTileSampler &AclComponentArgument::tile_sampler() const
-{
- CKW_ASSERT(_tile != nullptr);
-
- return _tile_sampler;
-}
diff --git a/compute_kernel_writer/src/acl/AclKernelWriter.cpp b/compute_kernel_writer/src/acl/AclKernelWriter.cpp
deleted file mode 100644
index a44e798c61..0000000000
--- a/compute_kernel_writer/src/acl/AclKernelWriter.cpp
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "acl/AclKernelWriter.h"
-#include "acl/AclComponentArgument.h"
-#include "ckw/Error.h"
-#include "ckw/TileInfo.h"
-
-AclKernelWriter::AclKernelWriter(ckw::Kernel &kernel)
- : KernelWriter(kernel)
-{
-}
-
-void AclKernelWriter::op_load_once(AclComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler)
-{
- if(!tensor_or_tile->has_tile())
- {
- CKW_ASSERT(tensor_or_tile->has_tensor());
-
- auto &tensor = tensor_or_tile->tensor();
-
- const auto tile_name = tensor.name() + "_tile";
- auto &tile = declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
-
- op_load(tile, tensor, sampler);
-
- tensor_or_tile->init_virtual_tensor(tile, sampler);
- }
-}
diff --git a/compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp b/compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp
deleted file mode 100644
index 2a73d47592..0000000000
--- a/compute_kernel_writer/src/acl/AclScopedKernelWriter.cpp
+++ /dev/null
@@ -1,58 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "acl/AclScopedKernelWriter.h"
-#include "acl/AclKernelWriter.h"
-
-AclScopedKernelWriter::AclScopedKernelWriter(AclKernelWriter *writer)
- : _writer(writer), _parent_id_space(writer->id_space())
-{
- _writer->next_id_space();
-}
-
-AclScopedKernelWriter::AclScopedKernelWriter(const AclScopedKernelWriter &other)
- : _writer(other._writer), _parent_id_space(other._writer->id_space())
-{
- _writer->next_id_space();
-}
-
-AclKernelWriter *AclScopedKernelWriter::operator->()
-{
- return _writer;
-}
-
-const AclKernelWriter *AclScopedKernelWriter::operator->() const
-{
- return _writer;
-}
-
-AclKernelWriter *AclScopedKernelWriter::writer()
-{
- return _writer;
-}
-
-const AclKernelWriter *AclScopedKernelWriter::writer() const
-{
- return _writer;
-}