Remove CKW prototype and Template Writer

Gpu code in dynamic fusion is now written by stable CKW. We do not need CKW protoype and the older writer implementation, i.e. TemplateWriter. It also removes the need for the flag -DACL_INTERNAL_TEST_CKW_IN_DF to compile and test dynamic fusion operator. Resolves: COMPMID-6715 Signed-off-by: Gunes Bayir <gunes.bayir@arm.com> Change-Id: I9f9453311e79d9be612bd4754240d832f98503e8 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/11116 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Jakub Sujak <jakub.sujak@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Gunes Bayir <gunes.bayir@arm.com> 2024-02-07 15:34:45 +0000
committer: Gunes Bayir <gunes.bayir@arm.com> 2024-02-09 15:59:45 +0000
commit: 0ee13afc4429411de9a05ba4c2ff8a580784b568 (patch)
tree: c9ee1acf684d52b92ffb7500b0b65eee8377ce45 /compute_kernel_writer
parent: a3e1b50588b89a2c0c67da2679728a422fc16402 (diff)
download: ComputeLibrary-0ee13afc4429411de9a05ba4c2ff8a580784b568.tar.gz
37 files changed, 0 insertions, 9639 deletions
diff --git a/compute_kernel_writer/prototype/CMakeLists.txt b/compute_kernel_writer/prototype/CMakeLists.txt
deleted file mode 100644
index 439dcd3b7e..0000000000
--- a/compute_kernel_writer/prototype/CMakeLists.txt
+++ /dev/null
@@ -1,78 +0,0 @@
-# Copyright (c) 2023 Arm Limited.
-#
-# SPDX-License-Identifier: MIT
-#
-# Permission is hereby granted, free of charge, to any person obtaining a copy
-# of this software and associated documentation files (the "Software"), to
-# deal in the Software without restriction, including without limitation the
-# rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-# sell copies of the Software, and to permit persons to whom the Software is
-# furnished to do so, subject to the following conditions:
-#
-# The above copyright notice and this permission notice shall be included in all
-# copies or substantial portions of the Software.
-#
-# THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-# IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-# FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-# AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-# LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-# OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-# SOFTWARE.
-
-cmake_minimum_required(VERSION 3.14 FATAL_ERROR)
-
-#---------------------------------------------------------------------
-# Prototype
-
-add_library(ckw_prototype
-    src/TileInfo.cpp
-    src/TensorInfo.cpp
-    src/Kernel.cpp
-    src/KernelWriter.cpp
-    src/OperandBase.cpp
-    src/TileOperand.cpp
-    src/TensorOperand.cpp
-    src/TensorTileSampler.cpp
-    src/KernelArgument.cpp
-)
-
-target_compile_options(ckw_prototype
-    PUBLIC
-    ${CKW_CXX_FLAGS}
-    "$<$<CXX_COMPILER_ID:GNU>:${GNU_WARNINGS}>"
-    "$<$<CONFIG:Debug>:${CKW_ASSERTS_OPTS}>"
-    "$<$<BOOL:${CKW_ENABLE_ASSERTS}>:${CKW_ASSERTS_OPTS}>"
-    ${CMAKE_CXX_FLAGS}
-    PRIVATE
-    $<$<CONFIG:Release>:-Os>
-)
-
-target_compile_definitions(ckw_prototype PUBLIC
-    $<$<CONFIG:Debug>:COMPUTE_KERNEL_WRITER_DEBUG_ENABLED>
-    $<$<CONFIG:Debug>:COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED>
-    $<$<BOOL:${CKW_ENABLE_ASSERTS}>:COMPUTE_KERNEL_WRITER_ASSERTS_ENABLED>
-    $<$<BOOL:${CKW_ENABLE_OPENCL}>:COMPUTE_KERNEL_WRITER_OPENCL_ENABLED>
-)
-
-target_include_directories(ckw_prototype
-    PUBLIC ${CMAKE_CURRENT_LIST_DIR}/include
-    PRIVATE ${CMAKE_CURRENT_LIST_DIR}
-)
-
-#---------------------------------------------------------------------
-# Examples
-
-add_library(ckw_prototype_examples_common
-    examples/common/ExampleKernelWriter.cpp
-    examples/common/ExampleScopedKernelWriter.cpp
-    examples/common/ExampleComponentArgument.cpp
-)
-
-target_link_libraries(ckw_prototype_examples_common PUBLIC ckw_prototype)
-
-add_executable(ckw_prototype_examples_add_exp_store examples/add_exp_store.cpp)
-target_link_libraries(ckw_prototype_examples_add_exp_store PUBLIC ckw_prototype_examples_common)
-
-add_executable(writer_helper examples/writer_helper.cpp)
-target_link_libraries(writer_helper PUBLIC ckw_prototype)
diff --git a/compute_kernel_writer/prototype/examples/add_exp_store.cpp b/compute_kernel_writer/prototype/examples/add_exp_store.cpp
deleted file mode 100644
index 2b640ca01b..0000000000
--- a/compute_kernel_writer/prototype/examples/add_exp_store.cpp
+++ /dev/null
@@ -1,206 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/Error.h"
-#include "ckw/KernelArgument.h"
-#include "ckw/KernelWriter.h"
-#include "ckw/TensorOperand.h"
-#include "ckw/TensorTileSampler.h"
-#include "ckw/TileOperand.h"
-
-#include "common/ExampleComponentArgument.h"
-#include "common/ExampleKernelWriter.h"
-#include "common/ExampleScopedKernelWriter.h"
-#include <iostream>
-#include <vector>
-
-using namespace ckw;
-
-TensorTileSampler create_simple_sampler(ExampleScopedKernelWriter writer)
-{
-    TensorTileSampler sampler;
-
-    constexpr int32_t m0 = 4;
-    constexpr int32_t n0 = 4;
-
-    auto &gid_0 = writer->declare_tile("gid_0", DataType::Int32);
-    auto &gid_1 = writer->declare_tile("gid_1", DataType::Int32);
-    auto &gid_2 = writer->declare_tile("gid_2", DataType::Int32);
-
-    auto &const_0 = writer->declare_tile("0", 0);
-
-    writer->op_get_global_id(gid_0, 0);
-    writer->op_get_global_id(gid_1, 1);
-    writer->op_get_global_id(gid_2, 2);
-
-    sampler.x(gid_0);
-    sampler.y(gid_1);
-    sampler.z(const_0);
-    sampler.b(gid_2);
-
-    sampler.width(n0);
-    sampler.height(m0);
-
-    sampler.format(TensorSamplerFormat::C_WH_1);
-    sampler.address_mode_x(TensorSamplerAddressModeX::None);
-    sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder);
-    sampler.address_mode_z(TensorSamplerAddressModeZ::Skip);
-
-    return sampler;
-}
-
-void op_binary_elementwise(ExampleScopedKernelWriter writer, std::vector<ExampleComponentArgument *> operands)
-{
-    auto lhs = operands.at(0);
-    auto rhs = operands.at(1);
-    auto dst = operands.at(2);
-
-    // Load the LHS and RHS tile and prepare the tensor sampler.
-    if (!lhs->has_tile() && !rhs->has_tile())
-    {
-        const auto sampler = create_simple_sampler(writer);
-
-        writer->op_load_once(lhs, sampler);
-        writer->op_load_once(rhs, sampler);
-    }
-    else if (lhs->has_tile())
-    {
-        const auto &sampler = lhs->tile_sampler();
-        writer->op_load_once(rhs, sampler);
-    }
-    else
-    {
-        const auto &sampler = rhs->tile_sampler();
-        writer->op_load_once(lhs, sampler);
-    }
-
-    auto       &lhs_tile = lhs->tile();
-    auto       &rhs_tile = rhs->tile();
-    const auto &sampler  = lhs->tile_sampler();
-
-    // Prepare the output tile.
-    if (!dst->has_tile())
-    {
-        auto &tile = writer->declare_tile("dst_tile", lhs_tile.tile_info());
-        dst->init_virtual_tensor(tile, sampler);
-    }
-
-    auto &dst_tile = dst->tile();
-
-    // Perform the operation.
-    writer->op_binary_expression(dst_tile, lhs_tile, BinaryOp::Add, rhs_tile);
-}
-
-void op_exp(ExampleScopedKernelWriter writer, std::vector<ExampleComponentArgument *> operands)
-{
-    auto src = operands.at(0);
-    auto dst = operands.at(1);
-
-    // Load the source tile and prepare the sampler.
-    if (!src->has_tile())
-    {
-        const auto sampler = create_simple_sampler(writer);
-        writer->op_load_once(src, sampler);
-    }
-
-    auto       &src_tile = src->tile();
-    const auto &sampler  = src->tile_sampler();
-
-    // Prepare the output tile.
-    if (!dst->has_tile())
-    {
-        auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info());
-        dst->init_virtual_tensor(tile, sampler);
-    }
-
-    auto &dst_tile = dst->tile();
-
-    // Perform the operation.
-    writer->op_unary_elementwise_function(dst_tile, UnaryFunction::Exp, src_tile);
-}
-
-void op_store(ExampleScopedKernelWriter writer, std::vector<ExampleComponentArgument *> operands)
-{
-    auto src = operands.at(0);
-    auto dst = operands.at(1);
-
-    auto       &src_tile   = src->tile();
-    const auto &sampler    = src->tile_sampler();
-    auto       &dst_tensor = dst->tensor();
-
-    writer->op_store(dst_tensor, src_tile, sampler);
-}
-
-int main()
-{
-    Kernel              kernel("example", GpuTargetLanguage::OpenCL);
-    ExampleKernelWriter root_writer(kernel);
-
-    ExampleScopedKernelWriter writer(&root_writer);
-
-    const TensorInfo src0_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 0);
-    const TensorInfo src1_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 1);
-    const TensorInfo dst_info(DataType::Fp32, TensorShape({3, 10, 20, 1, 1}), TensorDataLayout::Nhwc, 2);
-
-    ExampleComponentArgument src0(
-        writer->declare_tensor_argument("src0", src0_info, TensorStorageType::BufferUint8Ptr));
-    ExampleComponentArgument src1(
-        writer->declare_tensor_argument("src1", src1_info, TensorStorageType::BufferUint8Ptr));
-    ExampleComponentArgument dst(writer->declare_tensor_argument("dst", dst_info, TensorStorageType::BufferUint8Ptr));
-
-    ExampleComponentArgument ans;
-
-    op_binary_elementwise(writer, {&src0, &src1, &ans});
-    op_exp(writer, {&ans, &ans});
-    op_store(writer, {&ans, &dst});
-
-    const auto arguments = kernel.arguments();
-
-    std::cout << "\n====================\nArguments:\n====================\n";
-
-    for (auto &arg : arguments)
-    {
-        switch (arg.type())
-        {
-            case ckw::KernelArgument::Type::TensorStorage:
-                std::cout << "* Tensor storage:   ID = " << arg.id() << ", type = " << std::hex << "0x"
-                          << static_cast<uint32_t>(arg.tensor_storage_type()) << std::dec << "\n";
-                break;
-
-            case ckw::KernelArgument::Type::TensorComponent:
-                std::cout << "* Tensor component: ID = " << arg.id() << ", type = " << std::hex << "0x"
-                          << static_cast<uint32_t>(arg.tensor_component_type()) << std::dec << "\n";
-                break;
-
-            default:
-                CKW_ASSERT(false);
-        }
-    }
-
-    std::cout << "\n====================\nCode:\n====================\n";
-    const auto code = root_writer.generate_code();
-    std::cout << code;
-
-    return 0;
-}
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp
deleted file mode 100644
index 55223dae0e..0000000000
--- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.cpp
+++ /dev/null
@@ -1,98 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ExampleComponentArgument.h"
-
-#include "ckw/Error.h"
-
-ExampleComponentArgument::ExampleComponentArgument()
-{
-}
-
-ExampleComponentArgument::ExampleComponentArgument(ckw::TensorOperand &tensor) : _tensor(&tensor)
-{
-}
-
-ExampleComponentArgument &ExampleComponentArgument::init_virtual_tensor(ckw::TileOperand             &tile,
-                                                                        const ckw::TensorTileSampler &tile_sampler)
-{
-    CKW_ASSERT(_tile == nullptr);
-
-    _tile         = &tile;
-    _tile_sampler = tile_sampler;
-
-    return *this;
-}
-
-bool ExampleComponentArgument::has_tensor() const
-{
-    return _tensor != nullptr;
-}
-
-ckw::TensorOperand &ExampleComponentArgument::tensor()
-{
-    CKW_ASSERT(_tensor != nullptr);
-
-    return *_tensor;
-}
-
-const ckw::TensorOperand &ExampleComponentArgument::tensor() const
-{
-    CKW_ASSERT(_tensor != nullptr);
-
-    return *_tensor;
-}
-
-bool ExampleComponentArgument::has_tile() const
-{
-    return _tile != nullptr;
-}
-
-ckw::TileOperand &ExampleComponentArgument::tile()
-{
-    CKW_ASSERT(_tile != nullptr);
-
-    return *_tile;
-}
-
-const ckw::TileOperand &ExampleComponentArgument::tile() const
-{
-    CKW_ASSERT(_tile != nullptr);
-
-    return *_tile;
-}
-
-ckw::TensorTileSampler &ExampleComponentArgument::tile_sampler()
-{
-    CKW_ASSERT(_tile != nullptr);
-
-    return _tile_sampler;
-}
-
-const ckw::TensorTileSampler &ExampleComponentArgument::tile_sampler() const
-{
-    CKW_ASSERT(_tile != nullptr);
-
-    return _tile_sampler;
-}
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h b/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h
deleted file mode 100644
index 0e029b1157..0000000000
--- a/compute_kernel_writer/prototype/examples/common/ExampleComponentArgument.h
+++ /dev/null
@@ -1,112 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLECOMPONENTARGUMENT_H
-#define CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLECOMPONENTARGUMENT_H
-
-#include "ckw/TensorTileSampler.h"
-
-namespace ckw
-{
-class TensorOperand;
-
-class TileOperand;
-} // namespace ckw
-
-/** The argument of a dynamic fusion component which can be either user tensor or virtual tensor. */
-class ExampleComponentArgument
-{
-public:
-    /** Initialize a new instance of @ref ExampleComponentArgument class for empty virtual tensor. */
-    ExampleComponentArgument();
-
-    /** Initialize a new instance of @ref ExampleComponentArgument class for user tensor.
-     *
-     * @param[in] tensor The user tensor.
-     */
-    explicit ExampleComponentArgument(ckw::TensorOperand &tensor);
-
-    /** Set virtual tensor information (tile, sampler) for the argument.
-     *
-     * If the component is a user tensor, it can be treated as virtual tensor as well
-     * and won't be loaded again using @ref ExampleKernelWriter::op_load_once method.
-     *
-     * @param[in] tile    The tile that has been loaded.
-     * @param[in] sampler The tensor sampling information that has been used to load the tile.
-     */
-    ExampleComponentArgument &init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &sampler);
-
-    /** Get whether the argument is a user tensor. */
-    bool has_tensor() const;
-
-    /** Get the tensor operand.
-     *
-     * If the tensor is not available, throw an error.
-     */
-    ckw::TensorOperand &tensor();
-
-    /** Get the tensor operand.
-     *
-     * If the tensor is not available, throw an error.
-     */
-    const ckw::TensorOperand &tensor() const;
-
-    /** Get whether the argument contains a tile.
-     *
-     * The argument can be either a user tensor that has been loaded,
-     * or a virtual tensor (i.e. a tile with tensor sampling information).
-     */
-    bool has_tile() const;
-
-    /** Get the tile operand.
-     *
-     * If the tile is not available, throw an error.
-     */
-    ckw::TileOperand &tile();
-
-    /** Get the tile operand.
-     *
-     * If the tile is not available, throw an error.
-     */
-    const ckw::TileOperand &tile() const;
-
-    /** Get the tensor sampling information for the tile.
-     *
-     * If the tile is not available, throw an error.
-     */
-    ckw::TensorTileSampler &tile_sampler();
-
-    /** Get the tensor sampling information for the tile.
-     *
-     * If the tile is not available, throw an error.
-     */
-    const ckw::TensorTileSampler &tile_sampler() const;
-
-private:
-    ckw::TensorOperand    *_tensor{nullptr};
-    ckw::TileOperand      *_tile{nullptr};
-    ckw::TensorTileSampler _tile_sampler{};
-};
-
-#endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLECOMPONENTARGUMENT_H
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp
deleted file mode 100644
index 1734ce8823..0000000000
--- a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.cpp
+++ /dev/null
@@ -1,52 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ExampleKernelWriter.h"
-
-#include "ckw/Error.h"
-#include "ckw/TileInfo.h"
-
-#include "ExampleComponentArgument.h"
-
-ExampleKernelWriter::ExampleKernelWriter(ckw::Kernel &kernel) : KernelWriter(kernel)
-{
-}
-
-void ExampleKernelWriter::op_load_once(ExampleComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler)
-{
-    if (!tensor_or_tile->has_tile())
-    {
-        CKW_ASSERT(tensor_or_tile->has_tensor());
-
-        auto &tensor = tensor_or_tile->tensor();
-
-        const auto tile_name = tensor.name() + "_tile";
-        auto      &tile =
-            declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
-
-        op_load(tile, tensor, sampler);
-
-        tensor_or_tile->init_virtual_tensor(tile, sampler);
-    }
-}
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h b/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h
deleted file mode 100644
index 1528c3d933..0000000000
--- a/compute_kernel_writer/prototype/examples/common/ExampleKernelWriter.h
+++ /dev/null
@@ -1,56 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLEKERNELWRITER_H
-#define CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLEKERNELWRITER_H
-
-#include "ckw/KernelWriter.h"
-#include "ckw/TensorTileSampler.h"
-
-class ExampleComponentArgument;
-
-namespace ckw
-{
-class Kernel;
-} // namespace ckw
-
-/** Extended implementation of kernel writer for dynamic fusion. */
-class ExampleKernelWriter : public ckw::KernelWriter
-{
-public:
-    /** Initialize a new instance of @ref ExampleKernelWriter class.
-     *
-     * @param[in] kernel The kernel to be generated.
-     */
-    explicit ExampleKernelWriter(ckw::Kernel &kernel);
-
-    /** Load the user tensor to the tile in the same component argument if it hasn't been loaded.
-     *
-     * @param[in] tensor_or_tile The component argument that is either a user tensor or a virtual tensor.
-     * @param[in] sampler        The tensor sampling information to load the tile.
-     */
-    void op_load_once(ExampleComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler);
-};
-
-#endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLEKERNELWRITER_H
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp
deleted file mode 100644
index 784d5ffb96..0000000000
--- a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.cpp
+++ /dev/null
@@ -1,59 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ExampleScopedKernelWriter.h"
-
-#include "ExampleKernelWriter.h"
-
-ExampleScopedKernelWriter::ExampleScopedKernelWriter(ExampleKernelWriter *writer)
-    : _writer(writer), _parent_id_space(writer->id_space())
-{
-    _writer->next_id_space();
-}
-
-ExampleScopedKernelWriter::ExampleScopedKernelWriter(const ExampleScopedKernelWriter &other)
-    : _writer(other._writer), _parent_id_space(other._writer->id_space())
-{
-    _writer->next_id_space();
-}
-
-ExampleKernelWriter *ExampleScopedKernelWriter::operator->()
-{
-    return _writer;
-}
-
-const ExampleKernelWriter *ExampleScopedKernelWriter::operator->() const
-{
-    return _writer;
-}
-
-ExampleKernelWriter *ExampleScopedKernelWriter::writer()
-{
-    return _writer;
-}
-
-const ExampleKernelWriter *ExampleScopedKernelWriter::writer() const
-{
-    return _writer;
-}
diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h
deleted file mode 100644
index 4655b1897e..0000000000
--- a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H
-#define CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H
-
-#include <cstdint>
-
-class ExampleKernelWriter;
-
-/** Helper to automatically manage kernel writer ID space. */
-class ExampleScopedKernelWriter
-{
-public:
-    /** Initialize a new instance of @ref ExampleScopedKernelWriter class. */
-    explicit ExampleScopedKernelWriter(ExampleKernelWriter *writer);
-
-    /** Create a new scope from the specified scoped kernel writer. */
-    ExampleScopedKernelWriter(const ExampleScopedKernelWriter &other);
-
-    /** Assignment is disallowed. */
-    ExampleScopedKernelWriter &operator=(const ExampleScopedKernelWriter &) = delete;
-
-    /** Access the underlying kernel writer. */
-    ExampleKernelWriter *operator->();
-
-    /** Access the underlying kernel writer. */
-    const ExampleKernelWriter *operator->() const;
-
-    /** Get the kernel writer. */
-    ExampleKernelWriter *writer();
-
-    /** Get the kernel writer. */
-    const ExampleKernelWriter *writer() const;
-
-private:
-    ExampleKernelWriter *_writer;
-    int32_t              _parent_id_space;
-};
-
-#endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H
diff --git a/compute_kernel_writer/prototype/examples/writer_helper.cpp b/compute_kernel_writer/prototype/examples/writer_helper.cpp
deleted file mode 100644
index 8623afbf50..0000000000
--- a/compute_kernel_writer/prototype/examples/writer_helper.cpp
+++ /dev/null
@@ -1,113 +0,0 @@
-/*
-* Copyright (c) 2023 Arm Limited.
-*
-* SPDX-License-Identifier: MIT
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy
-* of this software and associated documentation files (the "Software"), to
-* deal in the Software without restriction, including without limitation the
-* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-* sell copies of the Software, and to permit persons to whom the Software is
-* furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in all
-* copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
-
-#include "ckw/KernelWriter.h"
-#include "ckw/TensorTileSampler.h"
-
-#include "../include/ckw/KernelWriterHelper.h"
-#include <iostream>
-
-using namespace ckw;
-
-TensorTileSampler create_simple_sampler(KernelWriter &writer)
-{
-    TensorTileSampler sampler;
-
-    constexpr int32_t m0 = 1;
-    constexpr int32_t n0 = 1;
-
-    auto &gid_0 = writer.declare_tile("gid_0", DataType::Int32);
-    auto &gid_1 = writer.declare_tile("gid_1", DataType::Int32);
-    auto &gid_2 = writer.declare_tile("gid_2", DataType::Int32);
-
-    auto &const_0 = writer.declare_tile("0", 0);
-
-    writer.op_get_global_id(gid_0, 0);
-    writer.op_get_global_id(gid_1, 1);
-    writer.op_get_global_id(gid_2, 2);
-
-    sampler.x(gid_0);
-    sampler.y(gid_1);
-    sampler.z(gid_2);
-    sampler.b(const_0);
-
-    sampler.width(n0);
-    sampler.height(m0);
-
-    sampler.format(TensorSamplerFormat::C_WH_1);
-    sampler.address_mode_x(TensorSamplerAddressModeX::None);
-    sampler.address_mode_y(TensorSamplerAddressModeY::ClampToBorder);
-    sampler.address_mode_z(TensorSamplerAddressModeZ::Skip);
-
-    return sampler;
-}
-
-int main()
-{
-    Kernel                           kernel("test", GpuTargetLanguage::OpenCL);
-    KernelWriterHelper<KernelWriter> writer(kernel);
-
-    const TensorInfo src_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 0);
-    const TensorInfo dst_info(DataType::Fp32, TensorShape({1, 1, 1, 1, 1}), TensorDataLayout::Nhwc, 1);
-
-    auto &src_tensor = writer.declare_tensor_argument("src", src_info);
-    auto &dst_tensor = writer.declare_tensor_argument("dst", dst_info);
-
-    const auto sampler = create_simple_sampler(writer);
-
-    auto &src = writer.declare_tile("src_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width()));
-    auto &other =
-        writer.declare_tile("other_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width()));
-    auto &dst = writer.declare_tile("dst_tile", TileInfo(src_tensor.data_type(), sampler.height(), sampler.width()));
-
-    writer.op_load(src, src_tensor, sampler);
-    writer.op_load(other, src_tensor, sampler);
-    writer.op_load(dst, dst_tensor, sampler);
-
-    auto test       = dst ^ src ^ other;
-    auto other_test = logical_and(dst, src, other);
-    writer.op_assign(dst, logical_and(dst, src, other));
-    writer.op_assign(dst, test);
-    writer.op_assign(dst, other_test);
-    writer.op_assign(dst, operator^(operator^(dst, src), other));
-
-    writer.op_if(exp(src) == dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); })
-        .op_else_if(exp(src) > dst, [&] { writer.op_binary_expression(dst, src, BinaryOp::Add, src); })
-        .op_else([&] { writer.op_assign(dst, src); });
-
-    writer.op_assign(dst, src + src * src);
-    writer.op_assign(dst, src * max(src, dst) + src);
-    writer.op_assign(dst, src * select(src, dst, src) + src);
-
-    writer.op_assign(dst, src ^ dst);
-    writer.op_assign(dst, ~src);
-
-    writer.op_for_loop(dst < src, dst += src, [&] { writer.op_assign(dst, src + dst); });
-
-    writer.op_assign(dst += src);
-    writer.op_assign(dst += exp(src));
-
-    std::cout << "======== KERNEL ========" << std::endl;
-    std::cout << writer.generate_code() << std::endl;
-}
diff --git a/compute_kernel_writer/prototype/include/ckw/Error.h b/compute_kernel_writer/prototype/include/ckw/Error.h
deleted file mode 100644
index aab713c817..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/Error.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_ERROR_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_ERROR_H
-
-#include <stdexcept>
-#include <string>
-
-namespace ckw
-{
-
-/** If the condition is not met, throw an std::runtime_error with the specified message.
- *
- * @param[in] cond The condition that is expected to be true.
- * @param[in] msg  The error message when the condition is not met.
- */
-#define CKW_ASSERT_MSG(cond, msg)            \
-    do                                       \
-    {                                        \
-        if (!(cond))                         \
-        {                                    \
-            throw ::std::runtime_error(msg); \
-        }                                    \
-    } while (false)
-
-/** If the condition is not met, throw an std::runtime_error.
- *
- * @param[in] cond The condition that is expected to be true.
- */
-#define CKW_ASSERT(cond) CKW_ASSERT_MSG(cond, #cond)
-
-/** If the precondition is met but the consequence is not met, throw an std::runtime_error.
- *
- * @param[in] precond The condition if is met requires the consequence must also be met.
- * @param[in] cond    The condition that is expected to be true if the precondition is true.
- */
-#define CKW_ASSERT_IF(precond, cond) CKW_ASSERT_MSG(!(precond) || ((precond) && (cond)), #precond " |-> " #cond)
-
-/** Mark the variables as unused.
- *
- * @param[in] ... Variables which are unused.
- */
-#define CKW_UNUSED(...) ::ckw::ignore_unused(__VA_ARGS__) // NOLINT
-
-/** Mark the variables as unused.
- *
- * @param[in] ... Variables which are unused.
- */
-template <typename... T>
-inline void ignore_unused(T &&...)
-{
-}
-
-} // namespace ckw
-
-#endif // CKW_INCLUDE_CKW_ERROR_H
diff --git a/compute_kernel_writer/prototype/include/ckw/Kernel.h b/compute_kernel_writer/prototype/include/ckw/Kernel.h
deleted file mode 100644
index ba31a29ba7..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/Kernel.h
+++ /dev/null
@@ -1,102 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_KERNEL_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_KERNEL_H
-
-#include "ckw/KernelArgument.h"
-#include "ckw/OperandBase.h"
-#include "ckw/types/GpuTargetLanguage.h"
-
-#include <map>
-#include <memory>
-#include <string>
-#include <vector>
-
-namespace ckw
-{
-
-class TileOperand;
-
-namespace prototype
-{
-class GpuKernelWriterDataHolder;
-} // namespace prototype
-
-/** The target for kernel writer to write into. */
-class Kernel
-{
-public:
-    /** Constructor
-     *
-     * @param[in] language The programming language to write the kernel.
-     */
-    Kernel(GpuTargetLanguage language);
-    /** Constructor
-     *
-     * @param[in] name     The name of the kernel function.
-     * @param[in] language The programming language to write the kernel.
-     */
-    Kernel(const char *name, GpuTargetLanguage language);
-
-    /** Destructor */
-    ~Kernel();
-
-    /** Get the name of the kernel function. */
-    const std::string &name() const;
-
-    /** Set the name of the kernel function.
-     *
-     * @param[in] name     The name of the kernel function.
-     */
-    void name(const std::string &name);
-
-    /** Get the list of kernel arguments. */
-    ::std::vector<KernelArgument> arguments() const;
-
-    /** (Internal use only) Register the tile operand.
-     *
-     * @param operand The tile operand to be registered.
-     */
-    TileOperand &register_operand(::std::unique_ptr<TileOperand> operand);
-
-    /** (Internal use only) Register the tensor operand.
-     *
-     * @param operand The tensor operand to be registered.
-     */
-    TensorOperand &register_operand(::std::unique_ptr<TensorOperand> operand);
-
-    /** (Internal use only) Get the implementation data. */
-    prototype::GpuKernelWriterDataHolder *impl();
-
-private:
-    ::std::string                                             _name;
-    ::std::unique_ptr<prototype::GpuKernelWriterDataHolder>   _kernel;
-    ::std::map<::std::string, ::std::unique_ptr<OperandBase>> _operands;
-    ::std::map<int32_t, TensorOperand *>                      _tensor_id_operands;
-};
-
-} // namespace ckw
-
-#endif // CKW_PROTOTYPE_INCLUDE_CKW_KERNEL_H
diff --git a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h b/compute_kernel_writer/prototype/include/ckw/KernelArgument.h
deleted file mode 100644
index 3384a20aef..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/KernelArgument.h
+++ /dev/null
@@ -1,107 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_KERNELARGUMENT_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_KERNELARGUMENT_H
-
-#include "ckw/TensorInfo.h"
-
-#include <cstdint>
-
-namespace ckw
-{
-
-class TensorOperand;
-class TensorComponentOperand;
-
-/** A kernel argument which can be either a tensor storage or a tensor component. */
-class KernelArgument
-{
-public:
-    /** The type of kernel argument. */
-    enum class Type : int32_t
-    {
-        /** The argument that provides the read and/or write access to the tensor data.
-         *
-         * See @ref ckw::TensorStorage to see the list of supported storage type.
-         */
-        TensorStorage,
-
-        /** The argument that provides extra information about the tensor.
-         *
-         * See @ref ckw::TensorComponent to see the list of supported component.
-         */
-        TensorComponent,
-    };
-
-    /** Initialize a new instance of kernel argument class for a tensor storage argument.
-     *
-     * @param[in] tensor The tensor whose storage is exposed to kernel arguments.
-     */
-    KernelArgument(TensorOperand &tensor);
-
-    /** Initialize a new instance of kernel argument class for a tensor component argument.
-     *
-     * @param[in] tensor_component The tensor component to be exposed to kernel arguments.
-     */
-    KernelArgument(TensorComponentOperand &tensor_component);
-
-    /** Get the type of kernel argument. */
-    Type type() const;
-
-    /** Get the argument ID.
-     *
-     * This method can be used to get the tensor info ID of both tensor storage and tensor component arguments.
-     */
-    int32_t id() const;
-
-    /** Get the type of tensor storage.
-     *
-     * This method can only be used for tensor storage argument.
-     */
-    TensorStorageType tensor_storage_type() const;
-
-    /** Get the tensor component type.
-     *
-     * This method can only be used for tensor component argument.
-     */
-    TensorComponentType tensor_component_type() const;
-
-private:
-    Type    _type;
-    int32_t _id;
-
-    union SubId
-    {
-        int32_t             unknown;
-        TensorStorageType   tensor_storage_type;
-        TensorComponentType tensor_component_type;
-    };
-
-    SubId _sub_id{0};
-};
-
-} // namespace ckw
-
-#endif // CKW_PROTOTYPE_INCLUDE_CKW_KERNELARGUMENT_H
diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h
deleted file mode 100644
index f9e0066f91..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h
+++ /dev/null
@@ -1,338 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_KERNELWRITER_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_KERNELWRITER_H
-
-#include "ckw/Kernel.h"
-#include "ckw/TensorInfo.h"
-#include "ckw/TensorOperand.h"
-#include "ckw/TileInfo.h"
-#include "ckw/TileOperand.h"
-#include "ckw/types/ConvertPolicy.h"
-#include "ckw/types/Functions.h"
-#include "ckw/types/Operators.h"
-
-#include <memory>
-
-namespace ckw
-{
-
-namespace prototype
-{
-struct GpuKernelWriterAttribute;
-
-class IGpuKernelWriter;
-} // namespace prototype
-
-/** Kernel writer. */
-class KernelWriter
-{
-public:
-    // =============================================================================================
-    // Constructors and destructor
-    // =============================================================================================
-
-    /** Initialize a new instance of kernel writer.
-     *
-     * @param[in] kernel The kernel to be written to.
-     */
-    explicit KernelWriter(Kernel &kernel);
-
-    /** Destructor */
-    ~KernelWriter();
-
-    /** No copy constructor. */
-    KernelWriter(const KernelWriter &) = delete;
-
-    /** No copy assignment. */
-    KernelWriter &operator=(const KernelWriter &) = delete;
-
-    // =============================================================================================
-    // Scope management
-    // =============================================================================================
-
-    /** Get the current ID space. */
-    int32_t id_space() const;
-
-    /** Set the current ID space. */
-    KernelWriter &id_space(int32_t id_space);
-
-    /** Switch to and return a new ID space. */
-    int32_t next_id_space();
-
-    // =============================================================================================
-    // Tensor and tile declaration
-    // =============================================================================================
-
-    /** Declare a tensor argument.
-     *
-     * @param[in] name         The name of the tensor.
-     * @param[in] info         The tensor info.
-     * @param[in] storage_type The tensor storage type.
-     *
-     * @return The @ref TensorOperand object.
-     */
-    TensorOperand &declare_tensor_argument(const std::string &name,
-                                           const TensorInfo  &info,
-                                           TensorStorageType  storage_type = TensorStorageType::BufferUint8Ptr);
-
-    /** Declare a compile-time constant scalar argument.
-     *
-     * @param[in] name  The name of the tile.
-     * @param[in] value The value of the tile.
-     *
-     * @return The @ref TileOperand object.
-     */
-    TileOperand &declare_tile_argument(const std::string &name, int32_t value);
-
-    /** Declare a new tile.
-     *
-     * The name of the tile must be unique in the current ID space.
-     *
-     * @param[in] name The name of the tile.
-     * @param[in] ...  The necessary arguments to create a new @ref TileOperand.
-     *
-     * @return The @ref TileOperand object.
-     */
-    template <typename... TArgs>
-    TileOperand &declare_tile(const std::string &name, TArgs &&...args)
-    {
-        const auto var_name = generate_variable_name(name);
-        auto       operand  = std::make_unique<TileOperand>(var_name, ::std::forward<TArgs>(args)...);
-
-        return declare_tile_operand(std::move(operand));
-    }
-
-    // =============================================================================================
-    // Load and store
-    // =============================================================================================
-
-    /** Load the data from the tensor memory to the tile using the sampling information.
-     *
-     * @param[out] tile       The tile to be loaded.
-     * @param[in]  tensor     The tensor to be read.
-     * @param[in]  sampler    The tensor sampling information.
-     * @param[in]  dilation_y Dilation in the Y dimension.
-     */
-    void op_load(TileOperand             &tile,
-                 const TensorOperand     &tensor,
-                 const TensorTileSampler &sampler,
-                 const TileOperand       &dilation_y = TileOperand("dil_y", 1));
-
-    /** Load the data from the tensor memory to the tile using the indirect buffer approach and respective of the sampling information.
-     *
-     * @param[out] tile    The tile to be loaded.
-     * @param[in]  tensor  The tensor to be read.
-     * @param[in]  sampler The tensor sampling information.
-     */
-    void op_load_indirect(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler);
-
-    /** Construct an indirection buffer in @p tile containing the precalculated addresses of elements in the source tensor.
-     *
-     * @param[out] tile    The tile to be loaded.
-     * @param[in]  tensor  The tensor the be read.
-     * @param[in]  sampler The tensor sampling information.
-     * @param[in]  x       The X coordinate.
-     * @param[in]  y       The Y coordinate.
-     * @param[in]  x_off   Offset in the X dimension.
-     * @param[in]  y_off   Offset in the Y dimension.
-     */
-    void util_get_indirect_buffer(TileOperand             &tile,
-                                  const TensorOperand     &tensor,
-                                  const TensorTileSampler &sampler,
-                                  const TileOperand       &x,
-                                  const TileOperand       &y,
-                                  const TileOperand       &x_off,
-                                  const TileOperand       &y_off);
-
-    /** Store the tile to the tensor using the specified sampling information.
-     *
-     * @param[out] dst     The tensor that the tile is written to.
-     * @param[in]  src     The tile to be stored.
-     * @param[in]  sampler The tensor sampling information.
-     */
-    void op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler);
-
-    // =============================================================================================
-    // Data processing
-    // =============================================================================================
-
-    /** Write assignment: `<dst> = <src>;`.
-     *
-     * @param[out] dst The destination tile.
-     * @param[in]  src The source tile.
-     */
-    void op_assign(const TileOperand &dst, const TileOperand &src);
-
-    /** Write the cast: `<dst> = convert_<dst.type><_sat>(<src>);`.
-     *
-     * @param[out] dst      The destination tile.
-     * @param[in]  src      The source tile.
-     * @param[in]  policy   The policy governing the behavior of the cast.
-     */
-    void op_cast_expression(const TileOperand &dst, const TileOperand &src, ConvertPolicy policy);
-
-    /** Write the unary expression: `<dst> = <op> <src>`.
-     *
-     * @param[out]  dst The destination tile.
-     * @param[in]   op  The unary operator.
-     * @param[in]   src The source tile.
-     */
-    void op_unary_expression(const TileOperand &dst, UnaryOp op, const TileOperand &src);
-
-    /** Write binary expression: `<dst> = <lhs> <op> <rhs>;`.
-     *
-     * @param[out] dst  The destination tile.
-     * @param[in]  lhs  The LHS tile.
-     * @param[in]  op   The binary operator.
-     * @param[in]  rhs  The RHS tile.
-     */
-    void op_binary_expression(const TileOperand &dst, const TileOperand &lhs, BinaryOp op, const TileOperand &rhs);
-
-    /** Write function applied to scalar value: `<dst> = <func>(<src>);`.
-     *
-     * @param[out] dst  The destination tile.
-     * @param[in]  func The function to be applied to the source tile.
-     * @param[in]  src  The source tile.
-     */
-    void op_unary_elementwise_function(const TileOperand &dst, UnaryFunction func, const TileOperand &src);
-
-    /** Write function applied to scalar value: `<dst> = <func>(<first>, <second>);`.
-     *
-     * @param[out] dst      The destination tile.
-     * @param[in]  func     The function to be applied to the source tiles.
-     * @param[in]  first    The first argument tile.
-     * @param[in]  second   The second argument tile.
-     */
-    void op_binary_elementwise_function(const TileOperand &dst,
-                                        BinaryFunction     func,
-                                        const TileOperand &first,
-                                        const TileOperand &second);
-
-    /** Write function applied to scalar value: `<dst> = <func>(<first>, <second>, <third>);`.
-     *
-     * @param[out] dst      The destination tile.
-     * @param[in]  func     The function to be applied to the source tiles.
-     * @param[in]  first    The first argument tile.
-     * @param[in]  second   The second argument tile.
-     * @param[in]  third    The third argument tile.
-     */
-    void op_ternary_elementwise_function(const TileOperand &dst,
-                                         TernaryFunction    func,
-                                         const TileOperand &first,
-                                         const TileOperand &second,
-                                         const TileOperand &third);
-
-    /** Write if-statement: `if(<lhs> <op> <rhs>) { <body> }`.
-     *
-     * @param[in] lhs   The LHS tile of the condition.
-     * @param[in] op    The relational binary operator.
-     * @param[in] rhs   The RHS tile of the condition.
-     * @param[in] body  The body of the if-statement.
-     */
-    void op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body);
-
-    /** Write else-if-statement: `else if(<lhs> <op> <rhs>) { <body> }`.
-     *
-     * @param[in] lhs   The LHS tile of the condition.
-     * @param[in] op    The relational binary operator.
-     * @param[in] rhs   The RHS tile of the condition.
-     * @param[in] body  The body of the else-if-statement.
-     */
-    void op_else_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body);
-
-    /** Write an else-statement: `else { <body> }`.
-     *
-     * @param[in] body The body of the else-statement.
-     */
-    void op_else(const std::function<void()> &body);
-
-    /** Write for-loops: `for(; <var> <cond_op> <cond_value>; <var> <update_op> <update_value>) { body }`.
-     *
-     * @param[in]       var_name          The name of the variable used in condition.
-     * @param[in]       cond_op           The relational binary operator used in condition.
-     * @param[in]       cond_value_name   The value which the variable is compared against.
-     * @param[in]       update_var_name   The name of the variable which is updated.
-     * @param[in]       update_op         The assignment operator used for updating the update value.
-     * @param[in, out]  update_value      The value which is updated at every iteration.
-     * @param[in]       body              The body of the for-loop.
-     */
-    void op_for_loop(const TileOperand           &var_name,
-                     BinaryOp                     cond_op,
-                     const TileOperand           &cond_value_name,
-                     const TileOperand           &update_var_name,
-                     AssignmentOp                 update_op,
-                     const TileOperand           &update_value_name,
-                     const std::function<void()> &body);
-
-    /** Write the return statement: `return;`
-     */
-    void op_return();
-
-    // =============================================================================================
-    // Misc
-    // =============================================================================================
-
-    /** Set `dst` the global ID of dimension `dim`.
-     *
-     * @param[out] dst The tile to be written to.
-     * @param[in]  dim The global ID dimension.
-     */
-    void op_get_global_id(const TileOperand &dst, int32_t dim);
-
-    // =============================================================================================
-    // Code generation
-    // =============================================================================================
-
-    /** Generate the source code of the kernel. */
-    ::std::string generate_code();
-
-private:
-    /** Generate the full variable name based on the original name and the ID space.
-     *
-     * @param[in] name The name of the variable.
-     *
-     * @return The full variable name.
-     */
-    ::std::string generate_variable_name(const std::string &name) const;
-
-    /** Declare the tile operand.
-     *
-     * @param[in] operand   The tile operand to be declared.
-     */
-    TileOperand &declare_tile_operand(std::unique_ptr<TileOperand> operand);
-
-private:
-    Kernel                                                *_kernel;
-    ::std::unique_ptr<prototype::GpuKernelWriterAttribute> _impl_attr;
-    ::std::unique_ptr<prototype::IGpuKernelWriter>         _impl;
-
-    int32_t _id_space{0};
-    int32_t _max_id_space{0};
-};
-
-} // namespace ckw
-
-#endif // CKW_PROTOTYPE_INCLUDE_CKW_KERNELWRITER_H
diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h b/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h
deleted file mode 100644
index 3ba079bbc2..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/KernelWriterHelper.h
+++ /dev/null
@@ -1,1286 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_INCLUDE_CKW_KERNELWRITERHELPER_H
-#define CKW_INCLUDE_CKW_KERNELWRITERHELPER_H
-
-#include "ckw/KernelWriter.h"
-#include "ckw/TensorOperand.h"
-#include "ckw/TileOperand.h"
-
-#include <iostream>
-#include <type_traits>
-
-/*
- * By including this header file you will be able to supplement the default
- * Compute Kernel Writer API with additional syntax to help ease the use of CKW.
- *
- * To use the KernelWriterHelper you need to wrap your instance of KernelWriter
- * (or any class deriving from KernelWriter):
- *      KernelWriterHelper<KernelWriter> writer;
- * The resulting writer object comprises the original KernelWriter
- * functionality (drop-in replacement), but extends the syntax as follows.
- *
- * Common functions/operators have natural syntax:
- *  1. Unary expressions:
- *          writer.op_assign(dst, !src);        // Logical NOT
- *          writer.op_assign(dst, ~src);        // Bitwise NOT
- *
- *  2. Binary expressions:
- *          writer.op_assign(dst, lhs + rhs);   // Addition
- *          writer.op_assign(dst, lhs - rhs);   // Subtraction
- *          writer.op_assign(dst, lhs * rhs);   // Multiplication
- *          writer.op_assign(dst, lhs / rhs);   // Division
- *          writer.op_assign(dst, lhs % rhs);   // Modulo
- *          writer.op_assign(dst, lhs == rhs);  // Equality
- *          writer.op_assign(dst, lhs < rhs);   // Less-than
- *          writer.op_assign(dst, lhs <= rhs);  // Less-than-or-equal
- *          writer.op_assign(dst, lhs > rhs);   // Greater-than
- *          writer.op_assign(dst, lhs >= rhs);  // Greater-than-or-equal
- *          writer.op_assign(dst, lhs ^ rhs);   // Bitwise XOR
- *          writer.op_assign(dst, logical_and(lhs, rhs));  // Logical AND
- *          writer.op_assign(dst, logical_or(lhs, rhs));   // Logical OR
- *
- *  3. Unary elementwise functions:
- *          writer.op_assign(dst, exp(src));    // Exponent
- *          writer.op_assign(dst, tanh(src));   // Hyperbolic tangent
- *          writer.op_assign(dst, sqrt(src));   // Square root
- *          writer.op_assign(dst, erf(src));    // Error function
- *          writer.op_assign(dst, fabs(src));   // Absolute of floating-point number
- *          writer.op_assign(dst, log(src));    // Natural logarithm
- *          writer.op_assign(dst, round(src));  // Round
- *          writer.op_assign(dst, sizeOf(src)); // sizeof
- *
- *  4. Binary elementwise functions:
- *          writer.op_assign(dst, max(first, second));      // Max
- *          writer.op_assign(dst, min(first, second));      // Min
- *
- *  5. Ternary elementwise functions:
- *          writer.op_assign(dst, select(first, second, third));    // Select
- *
- * NOTE: All the above examples support nesting, so you could write
- * something like: writer.op_assign(dst, src * (log(arg) + sqrt(abs(arg)));
- *
- *
- *  6. If-statements. The preceding syntax also allows easier writing of if-statements:
- *          writer.op_if(<cond>, <body>);
- *
- *     For example:
- *          writer.op_if(exp(first_arg) == dst, [&]{
- *              //...
- *          }).op_else_if(exp(first_arg) > dst, [&]{
- *              //...
- *          }).op_else([&] {
- *              //...
- *          });
- *
- *  7. For-loops. A similar syntax exists for for-loops:
- *          writer.op_for_loop(<cond>, <updater>, <body>);
- *
- *     For example:
- *          writer.op_for_loop(index < limit, index += step, [&]{
- *              //...
- *          });
- *
- * NOTE: There are limitations on the for-loop <cond> and <updater> parameters.
- * In neither the <cond> (Binary expression) or <updater> (Increment/Decrement)
- * is it allowed to use nesting. For example, `(index + other) < limit` and
- * `index < round(limit)` are invalid <cond> parameters. This is because the
- * semantics of for-loops rely on the condition being evaluated at every iteration,
- * but as temporary variables might be defined for nested expressions the semantics
- * cannot be guaranteed.
- */
-
-namespace ckw
-{
-
-// ==================================================
-// Type traits
-// ==================================================
-
-/** Specifies if the type can be used as an operand for functions (e.g. max), operations (e.g. *), or assignments. */
-template <typename T>
-struct can_be_operand : ::std::false_type
-{
-};
-
-/** Specifies if the type can be assigned/written to. */
-template <typename T>
-struct can_be_assigned : ::std::false_type
-{
-};
-
-template <>
-struct can_be_operand<TileOperand &> : ::std::true_type
-{
-};
-
-template <>
-struct can_be_assigned<TileOperand &> : ::std::true_type
-{
-};
-
-// ==================================================
-// Assignment
-// ==================================================
-
-/** AST node for assignments.
- *
- * Note that \p TRight must be an operand, and \p TLeft must be assignable.
- *
- * @tparam TLeft The type of the destination of the assignment.
- * @tparam TRight The type of the source assigned to the destination.
- */
-template <typename TLeft,
-          typename TRight,
-          typename = ::std::enable_if<can_be_operand<TRight>::value && can_be_assigned<TLeft>::value>>
-struct Assignment
-{
-    TLeft        lhs;
-    TRight       rhs;
-    AssignmentOp opcode;
-};
-
-/** Represents the expression: `\p lhs += \p rhs`.
- *
- * @tparam      TLeft    The type of the LHS of the assignment.
- * @tparam      TRight   The type of the RHS of the assignment.
- * @param[in]   lhs      The LHS of the assignment.
- * @param[in]   rhs      The RHS of the assignment.
- * @return      The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline Assignment<TLeft, TRight> operator+=(TLeft &&lhs, TRight &&rhs)
-{
-    return Assignment<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), AssignmentOp::Increment};
-}
-
-/** Represents the expression: `\p lhs -= \p rhs`.
- *
- * @tparam      TLeft    The type of the LHS of the assignment.
- * @tparam      TRight   The type of the RHS of the assignment.
- * @param[in]   lhs    The LHS of the assignment.
- * @param[in]   rhs    The RHS of the assignment.
- * @return      The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline Assignment<TLeft, TRight> operator-=(TLeft &&lhs, TRight &&rhs)
-{
-    return Assignment<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), AssignmentOp::Decrement};
-}
-
-// ==================================================
-// Unary expression
-// ==================================================
-
-/** AST node for unary expressions.
- *
- * Note that \p TSrc must be an operand.
- *
- * @tparam TSrc The type of the argument to the expression.
- */
-template <typename TSrc, typename = ::std::enable_if<can_be_operand<TSrc>::value>>
-struct UnaryExpression
-{
-    TSrc    src;
-    UnaryOp opcode;
-};
-
-template <typename TLeft>
-struct can_be_operand<UnaryExpression<TLeft>> : ::std::true_type
-{
-};
-
-/** Represents the expression: `!\p src`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-inline UnaryExpression<TSrc> operator!(TSrc &&src)
-{
-    return UnaryExpression<TSrc>{std::forward<TSrc>(src), UnaryOp::LogicalNot};
-}
-
-/** Represents the expression: `~\p src`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-inline UnaryExpression<TSrc> operator~(TSrc &&src)
-{
-    return UnaryExpression<TSrc>{std::forward<TSrc>(src), UnaryOp::BitwiseNot};
-}
-
-// ==================================================
-// Binary expressions
-// ==================================================
-
-/** AST node for binary expressions.
- *
- * Note that both \p TLeft and \p TRight must be operands.
- *
- * @tparam TLeft  The type of the left argument of the expression.
- * @tparam TRight The type of the right argument of the expression.
- */
-template <typename TLeft,
-          typename TRight,
-          typename = ::std::enable_if_t<can_be_operand<TLeft>::value && can_be_operand<TRight>::value>>
-struct BinaryExpression
-{
-    TLeft    lhs;
-    TRight   rhs;
-    BinaryOp opcode;
-};
-
-template <typename TLeft, typename TRight>
-struct can_be_operand<BinaryExpression<TLeft, TRight>> : ::std::true_type
-{
-};
-
-/** Represents the expression: `\p lhs + \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator+(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Add};
-}
-
-/** Represents the expression: `\p lhs - \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator-(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Sub};
-}
-
-/** Represents the expression: `\p lhs * \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator*(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Mul};
-}
-
-/** Represents the expression: `\p lhs / \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator/(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Div};
-}
-
-/** Represents the expression: `\p lhs % \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator%(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Mod};
-}
-
-/** Represents the expression: `\p lhs == \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator==(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Equal};
-}
-
-/** Represents the expression: `\p lhs < \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator<(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Less};
-}
-
-/** Represents the expression: `\p lhs <= \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator<=(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LessEqual};
-}
-
-/** Represents the expression: `\p lhs > \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator>(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::Greater};
-}
-
-/** Represents the expression: `\p lhs >= \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator>=(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::GreaterEqual};
-}
-
-/** Represents the expression: `\p lhs ^ \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> operator^(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::BitwiseXOR};
-}
-
-/** Represents the expression: `\p lhs && \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> logical_and(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalAnd};
-}
-
-/** Represents the expression: `\p lhs && \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight, typename... TOps>
-inline BinaryExpression<BinaryExpression<TLeft, TRight>, TOps...> logical_and(TLeft &&lhs, TRight &&rhs, TOps &&...ops)
-{
-    return logical_and(
-        BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalAnd},
-        std::forward<TOps>(ops)...);
-}
-
-/** Represents the expression: `\p lhs || \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight>
-inline BinaryExpression<TLeft, TRight> logical_or(TLeft &&lhs, TRight &&rhs)
-{
-    return BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalOr};
-}
-
-/** Represents the expression: `\p lhs || \p rhs`.
- *
- * @tparam     TLeft  The type of the LHS of the expression.
- * @tparam     TRight The type of the RHS of the expression.
- * @param[in]  lhs    The LHS of the expression.
- * @param[in]  rhs    The RHS of the expression.
- * @return     The resulting AST node.
- */
-template <typename TLeft, typename TRight, typename... TOps>
-inline BinaryExpression<BinaryExpression<TLeft, TRight>, TOps...> logical_or(TLeft &&lhs, TRight &&rhs, TOps &&...ops)
-{
-    return logical_or(
-        BinaryExpression<TLeft, TRight>{std::forward<TLeft>(lhs), std::forward<TRight>(rhs), BinaryOp::LogicalOr},
-        std::forward<TOps>(ops)...);
-}
-
-// ==================================================
-// Unary elementwise functions
-// ==================================================
-
-/** AST node for unary elementwise functions.
- *
- * Note that \p TSrc must be an operand.
- *
- * @tparam TSrc The type of the argument to the function.
- */
-template <typename TSrc, typename = ::std::enable_if<can_be_operand<TSrc>::value>>
-struct UnaryElementwiseFunction
-{
-    TSrc          src;
-    UnaryFunction opcode;
-};
-
-template <typename TLeft>
-struct can_be_operand<UnaryElementwiseFunction<TLeft>> : ::std::true_type
-{
-};
-
-/** Represents the expression: `exp(\p src)`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-UnaryElementwiseFunction<TSrc> exp(TSrc &&src)
-{
-    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Exp};
-}
-
-/** Represents the expression: `tanh(\p src)`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-UnaryElementwiseFunction<TSrc> tanh(TSrc &&src)
-{
-    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Tanh};
-}
-
-/** Represents the expression: `sqrt(\p src)`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-UnaryElementwiseFunction<TSrc> sqrt(TSrc &&src)
-{
-    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Sqrt};
-}
-
-/** Represents the expression: `erf(\p src)`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-UnaryElementwiseFunction<TSrc> erf(TSrc &&src)
-{
-    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Erf};
-}
-
-/** Represents the expression: `fabs(\p src)`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-UnaryElementwiseFunction<TSrc> fabs(TSrc &&src)
-{
-    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Fabs};
-}
-
-/** Represents the expression: `log(\p src)`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-UnaryElementwiseFunction<TSrc> log(TSrc &&src)
-{
-    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Log};
-}
-
-/** Represents the expression: `round(\p src)`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-UnaryElementwiseFunction<TSrc> round(TSrc &&src)
-{
-    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::Round};
-}
-
-/** Represents the expression: `sizeof(\p src)`.
- *
- * @tparam      TSrc The type of the argument.
- * @param[in]   src  The argument.
- * @return      The resulting AST node.
- */
-template <typename TSrc>
-UnaryElementwiseFunction<TSrc> sizeOf(TSrc &&src)
-{
-    return UnaryElementwiseFunction<TSrc>{std::forward<TSrc>(src), UnaryFunction::SizeOf};
-}
-
-// ==================================================
-// Binary elementwise functions
-// ==================================================
-
-/** AST node for binary elementwise functions.
- *
- * Note that both \p TFirst and \p TSecond must be operands.
- *
- * @tparam TFirst  The type of the left argument of the function.
- * @tparam TSecond The type of the right argument of the function.
- */
-template <typename TFirst,
-          typename TSecond,
-          typename = ::std::enable_if<can_be_operand<TFirst>::value && can_be_operand<TSecond>::value>>
-struct BinaryElementwiseFunction
-{
-    TFirst         first;
-    TSecond        second;
-    BinaryFunction opcode;
-};
-
-template <typename TFirst, typename TSecond>
-struct can_be_operand<BinaryElementwiseFunction<TFirst, TSecond>> : ::std::true_type
-{
-};
-
-/** Represents the function call: `max(\p first, \p second)`.
- *
- * @tparam      TFirst  The type of the first argument.
- * @tparam      TSecond The type of the second argument.
- * @param[in]   first   The first argument.
- * @param[in]   second  The second argument.
- * @return      The resulting AST node.
- */
-template <typename TFirst, typename TSecond>
-BinaryElementwiseFunction<TFirst, TSecond> max(TFirst &&first, TSecond &&second)
-{
-    return BinaryElementwiseFunction<TFirst, TSecond>{std::forward<TFirst>(first), std::forward<TSecond>(second),
-                                                      BinaryFunction::Max};
-}
-
-/** Represents the function call: `min(\p first, \p second)`.
- *
- * @tparam      TFirst  The type of the first argument.
- * @tparam      TSecond The type of the second argument.
- * @param[in]   first   The first argument.
- * @param[in]   second  The second argument.
- * @return      The resulting AST node.
- */
-template <typename TFirst, typename TSecond>
-BinaryElementwiseFunction<TFirst, TSecond> min(TFirst &&first, TSecond &&second)
-{
-    return BinaryElementwiseFunction<TFirst, TSecond>{std::forward<TFirst>(first), std::forward<TSecond>(second),
-                                                      BinaryFunction::Min};
-}
-
-// ==================================================
-// Ternary elementwise functions
-// ==================================================
-
-/** AST node for ternary elementwise functions.
- *
- * Note that \p TFirst, \p TSecond, and \p TThird all must be operands.
- *
- * @tparam TFirst The type of the first argument to the function.
- * @tparam TSecond The type of the second argument to the function.
- * @tparam TThird The type of the third argument to the function.
- */
-template <typename TFirst,
-          typename TSecond,
-          typename TThird,
-          typename = ::std::enable_if<can_be_operand<TFirst>::value && can_be_operand<TSecond>::value &&
-                                      can_be_operand<TThird>::value>>
-struct TernaryElementwiseFunction
-{
-    TFirst          first;
-    TSecond         second;
-    TThird          third;
-    TernaryFunction opcode;
-};
-
-template <typename TFirst, typename TSecond, typename TThird>
-struct can_be_operand<TernaryElementwiseFunction<TFirst, TSecond, TThird>> : ::std::true_type
-{
-};
-
-/** Represents the function call: `select(\p first, \p second, \p third)`.
- *
- * @tparam      TFirst  The type of the first argument.
- * @tparam      TSecond The type of the second argument.
- * @tparam      TThird  The type of the third argument.
- * @param[in]   first   The first argument.
- * @param[in]   second  The second argument.
- * @param[in]   third   The third argument.
- * @return      The resulting AST node.
- */
-template <typename TFirst, typename TSecond, typename TThird>
-TernaryElementwiseFunction<TFirst, TSecond, TThird> select(TFirst &&first, TSecond &&second, TThird &&third)
-{
-    return TernaryElementwiseFunction<TFirst, TSecond, TThird>{std::forward<TFirst>(first),
-                                                               std::forward<TSecond>(second),
-                                                               std::forward<TThird>(third), TernaryFunction::Select};
-}
-
-/** Helper class used to extend a KernelWriter with additional functionality
- * in order to make writing easier.
- *
- * This extension automatically handles creation of temporary variables, and
- * allows nested function calls and operations.
- *
- * @tparam TWriter The type of KernelWriter to be overloaded. This must inherit from KernelWriter.
- */
-template <class TWriter, typename = std::enable_if<std::is_base_of<KernelWriter, TWriter>::value>>
-class KernelWriterHelper : public TWriter
-{
-public:
-    using TWriter::TWriter;
-
-    // ==================================================
-    // If-statements
-    // ==================================================
-
-    // Un-hide original implementation, in case the original implementation is required.
-    using TWriter::op_if;
-
-    /** Represents the if-statement: `if(\p cond) { \p body }`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] cond The BinaryExpression representing the condition.
-     * @param[in] body The body of the if-statement.
-     */
-    KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TileOperand &, TileOperand &> &cond,
-                                       const std::function<void()>                          &body)
-    {
-        TWriter::op_if(cond.lhs, cond.opcode, cond.rhs, body);
-        return *this;
-    }
-
-    /** Represents the if-statement: `if(\p cond) { \p body }`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] cond The BinaryExpression representing the condition.
-     * @param[in] body The body of the if-statement.
-     */
-    template <typename TRight>
-    KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TileOperand &, TRight> &cond,
-                                       const std::function<void()>                   &body)
-    {
-        auto &tmp1 = declare_temp_tile(cond.lhs.tile_info());
-        op_assign(tmp1, cond.rhs);
-        TWriter::op_if(cond.lhs, cond.opcode, tmp1, body);
-        return *this;
-    }
-
-    /** Represents the if-statement: `if(\p cond) { \p body }`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] cond The BinaryExpression representing the condition.
-     * @param[in] body The body of the if-statement.
-     */
-    template <typename TLeft>
-    KernelWriterHelper<TWriter> &op_if(const BinaryExpression<TLeft, TileOperand &> &cond,
-                                       const std::function<void()>                  &body)
-    {
-        auto &tmp1 = declare_temp_tile(cond.rhs.tile_info());
-        op_assign(tmp1, cond.lhs);
-        TWriter::op_if(tmp1, cond.opcode, cond.rhs, body);
-        return *this;
-    }
-
-    // Un-hide original implementation, in case the original implementation is required.
-    using TWriter::op_else_if;
-
-    /** Represents the else-if-statement: `else if(\p cond) { \p body }`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] cond The BinaryExpression representing the condition.
-     * @param[in] body The body of the else-if-statement.
-     */
-    KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TileOperand &, TileOperand &> &cond,
-                                            const std::function<void()>                          &body)
-    {
-        TWriter::op_else_if(cond.lhs, cond.opcode, cond.rhs, body);
-        return *this;
-    }
-
-    /** Represents the else-if-statement: `else if(\p cond) { \p body }`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] cond The BinaryExpression representing the condition.
-     * @param[in] body The body of the else-if-statement.
-     */
-    template <typename TRight>
-    KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TileOperand &, TRight> &cond,
-                                            const std::function<void()>                   &body)
-    {
-        auto &tmp1 = declare_temp_tile(cond.lhs.tile_info());
-        op_assign(tmp1, cond.rhs);
-        TWriter::op_else_if(cond.lhs, cond.opcode, tmp1, body);
-        return *this;
-    }
-
-    /** Represents the else-if-statement: `else if(\p cond) { \p body }`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] cond The BinaryExpression representing the condition.
-     * @param[in] body The body of the else-if-statement.
-     */
-    template <typename TLeft>
-    KernelWriterHelper<TWriter> &op_else_if(const BinaryExpression<TLeft, TileOperand &> &cond,
-                                            const std::function<void()>                  &body)
-    {
-        auto &tmp1 = declare_temp_tile(cond.rhs.tile_info());
-        op_assign(tmp1, cond.lhs);
-        TWriter::op_else_if(tmp1, cond.opcode, cond.rhs, body);
-        return *this;
-    }
-
-    // ==================================================
-    // For-loops
-    // ==================================================
-
-    // Un-hide original implementation, in case the original implementation is required.
-    using TWriter::op_for_loop;
-
-    /** Represents the for-loop: `for(;\p cond; \p updater) { \p body }`.
-     *
-     * The BinaryExpression for the condition and the Assignment
-     * for the updater are unpacked and their components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] cond    The BinaryExpression representing the condition.
-     * @param[in] updater The Assignment representing the updater.
-     * @param[in] body    The body of the for-loop.
-     */
-    void op_for_loop(const BinaryExpression<TileOperand &, TileOperand &> &cond,
-                     const Assignment<TileOperand &, TileOperand &>       &updater,
-                     const std::function<void()>                          &body)
-    {
-        TWriter::op_for_loop(cond.lhs, cond.opcode, cond.rhs, updater.lhs, updater.opcode, updater.rhs, body);
-    }
-
-    // ==================================================
-    // Unary expressions
-    // ==================================================
-
-    // Un-hide original implementation, in case the original implementation is required.
-    using TWriter::op_assign;
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The UnaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The UnaryExpression representing the expression to be evaluated and assigned.
-     */
-    void op_assign(const TileOperand &dst, const UnaryExpression<TileOperand &> &exp)
-    {
-        TWriter::op_unary_expression(dst, exp.opcode, exp.src);
-    }
-
-    // ==================================================
-    // Binary expressions
-    // ==================================================
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The BinaryExpression representing the expression to be evaluated and assigned.
-     */
-    void op_assign(const TileOperand &dst, const BinaryExpression<TileOperand &, TileOperand &> &exp)
-    {
-        TWriter::op_binary_expression(dst, exp.lhs, exp.opcode, exp.rhs);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The BinaryExpression representing the expression to be evaluated and assigned.
-     */
-    template <typename TRight>
-    void op_assign(const TileOperand &dst, const BinaryExpression<TileOperand &, TRight> &exp)
-    {
-        std::cout << "Beginning assignment!" << std::endl;
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.rhs);
-        TWriter::op_binary_expression(dst, exp.lhs, exp.opcode, tmp1);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The BinaryExpression representing the expression to be evaluated and assigned.
-     */
-    template <typename TLeft>
-    void op_assign(const TileOperand &dst, const BinaryExpression<TLeft, TileOperand &> &exp)
-    {
-        std::cout << "Beginning assignment!" << std::endl;
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.lhs);
-        TWriter::op_binary_expression(dst, tmp1, exp.opcode, exp.rhs);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The BinaryExpression is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The BinaryExpression representing the expression to be evaluated and assigned.
-     */
-    template <typename TLeft, typename TRight>
-    void op_assign(const TileOperand &dst, const BinaryExpression<TLeft, TRight> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        auto &tmp2 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.lhs);
-        op_assign(tmp2, exp.rhs);
-        TWriter::op_binary_expression(dst, tmp1, exp.opcode, tmp2);
-    }
-
-    // ==================================================
-    // Unary elementwise functions
-    // ==================================================
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The UnaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The UnaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    void op_assign(const TileOperand &dst, const UnaryElementwiseFunction<TileOperand &> &exp)
-    {
-        TWriter::op_unary_elementwise_function(dst, exp.opcode, exp.src);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The UnaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The UnaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TArg>
-    void op_assign(const TileOperand &dst, const UnaryElementwiseFunction<TArg> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.lhs);
-        TWriter::op_unary_elementwise_function(dst, exp.opcode, tmp1);
-    }
-
-    // ==================================================
-    // Binary elementwise functions
-    // ==================================================
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The BinaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The BinaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    void op_assign(const TileOperand &dst, const BinaryElementwiseFunction<TileOperand &, TileOperand &> &exp)
-    {
-        TWriter::op_binary_elementwise_function(dst, exp.opcode, exp.first, exp.second);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The BinaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The BinaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TRight>
-    void op_assign(const TileOperand &dst, const BinaryElementwiseFunction<TileOperand &, TRight> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.second);
-        TWriter::op_binary_elementwise_function(dst, exp.opcode, exp.first, tmp1);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The BinaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The BinaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TLeft>
-    void op_assign(const TileOperand &dst, const BinaryElementwiseFunction<TLeft, TileOperand &> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.first);
-        TWriter::op_binary_elementwise_function(dst, exp.opcode, tmp1, exp.second);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The BinaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The BinaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TLeft, typename TRight>
-    void op_assign(const TileOperand &dst, const BinaryElementwiseFunction<TLeft, TRight> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        auto &tmp2 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.first);
-        op_assign(tmp2, exp.second);
-        TWriter::op_binary_elementwise_function(dst, exp.opcode, tmp1, tmp2);
-    }
-
-    // ==================================================
-    // Ternary elementwise functions
-    // ==================================================
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The TernaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    void op_assign(const TileOperand                                                             &dst,
-                   const TernaryElementwiseFunction<TileOperand &, TileOperand &, TileOperand &> &exp)
-    {
-        TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, exp.second, exp.third);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The TernaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TFirst>
-    void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TFirst, TileOperand &, TileOperand &> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.first);
-        TWriter::op_ternary_elementwise_function(dst, exp.opcode, tmp1, exp.second, exp.third);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The TernaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TSecond>
-    void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TileOperand &, TSecond, TileOperand &> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.second);
-        TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, tmp1, exp.third);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The TernaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TThird>
-    void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TileOperand &, TileOperand &, TThird> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.third);
-        TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, exp.second, tmp1);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The TernaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TFirst, typename TSecond>
-    void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TFirst, TSecond, TileOperand &> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        auto &tmp2 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.first);
-        op_assign(tmp2, exp.second);
-        TWriter::op_ternary_elementwise_function(dst, exp.opcode, tmp1, tmp2, exp.third);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The TernaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TFirst, typename TThird>
-    void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TFirst, TileOperand &, TThird> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        auto &tmp2 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.first);
-        op_assign(tmp2, exp.third);
-        TWriter::op_ternary_elementwise_function(dst, exp.opcode, tmp1, exp.second, tmp2);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The TernaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TSecond, typename TThird>
-    void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TileOperand &, TSecond, TThird> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info());
-        auto &tmp2 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.second);
-        op_assign(tmp2, exp.third);
-        TWriter::op_ternary_elementwise_function(dst, exp.opcode, exp.first, tmp1, tmp2);
-    }
-
-    /** Represents the assignment: `\p dst = \p exp`.
-     *
-     * The TernaryElementwiseFunction is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] dst The tile which is assigned to.
-     * @param[in] exp The TernaryElementwiseFunction representing the expression to be evaluated and assigned.
-     */
-    template <typename TFirst, typename TSecond, typename TThird>
-    void op_assign(const TileOperand &dst, const TernaryElementwiseFunction<TFirst, TSecond, TThird> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(dst.tile_info(), dst.tile_info(), dst.tile_info());
-        auto &tmp2 = declare_temp_tile(dst.tile_info());
-        auto &tmp3 = declare_temp_tile(dst.tile_info());
-        op_assign(tmp1, exp.first);
-        op_assign(tmp2, exp.second);
-        op_assign(tmp3, exp.third);
-        TWriter::op_ternary_elementwise_function(dst, exp.opcode, tmp1, tmp2, tmp3);
-    }
-
-    // ==================================================
-    // Assignments
-    // ==================================================
-
-    /** Represents the assignment: `\p lhs += \p rhs` or `\p lhs -= \p rhs`.
-     *
-     * The Assignment is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @param[in] exp The Assignment representing the expression to be evaluated.
-     */
-    void op_assign(const Assignment<TileOperand &, TileOperand &> &exp)
-    {
-        if (exp.opcode == AssignmentOp::Increment)
-        {
-            TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Add, exp.rhs);
-        }
-        else if (exp.opcode == AssignmentOp::Decrement)
-        {
-            TWriter::op_binary_expression(exp.lhs, exp.lhs, BinaryOp::Sub, exp.rhs);
-        }
-    }
-
-    /** Represents the assignment: `\p lhs += \p rhs` or `\p lhs -= \p rhs`.
-     *
-     * The Assignment is unpacked and its components are forwarded to
-     * the underlying KernelWriter's implementation.
-     *
-     * @tparam    TRight The type of the RHS of the assignment.
-     * @param[in] exp    The Assignment representing the expression to be evaluated.
-     */
-    template <typename TRight>
-    void op_assign(const Assignment<TileOperand &, TRight> &exp)
-    {
-        auto &tmp1 = declare_temp_tile(exp.lhs.tile_info());
-        op_assign(tmp1, exp.rhs);
-        op_assign(Assignment<TileOperand &, TileOperand &>{exp.lhs, tmp1, exp.opcode});
-    }
-
-private:
-    unsigned int temp_var_counter = 0;
-
-    /** Return the current counter value, then increment it.
-     *
-     * @return The current counter value.
-     */
-    int next_ctr()
-    {
-        return temp_var_counter++;
-    }
-
-    /** Gets the next temporary variable counter value,
-     * and returns a suitable temporary variable name.
-     *
-     * @return A temporary variable name.
-     */
-    std::string next_tmp_var_name()
-    {
-        return "tmp_" + std::to_string(next_ctr());
-    }
-
-    /** Returns the argument.
-     *
-     * Used for recursion with the variadic function version of this function.
-     *
-     * @param[in] arg The TileInfo to return.
-     * @return    The \p arg.
-     */
-    TileInfo get_largest_size(const TileInfo &arg)
-    {
-        return arg;
-    }
-
-    /** Returns a TileInfo object where the size in each dimension (width, height) is the largest
-     * of either TileInfo argument in the corresponding dimension.
-     *
-     * @tparam    TOps   Must be of TileInfo type.
-     * @param[in] first  A TileInfo object.
-     * @param[in] second A TileInfo object.
-     * @param[in] ops    A number of TileInfo objects.
-     * @return    A TileInfo object which represents the largest shape in each dimension across the arguments.
-     */
-    template <typename... TOps, typename = ::std::enable_if_t<std::is_same<TOps..., TileInfo>::value>>
-    TileInfo get_largest_size(const TileInfo &first, const TileInfo &second, const TOps &...ops)
-    {
-        TileInfo largest = {first.data_type(), std::max(first.width(), second.width()),
-                            std::max(first.height(), second.height())};
-        return get_largest_size(largest, ops...);
-    }
-
-    /** Helper function to define a suitable TileOperand with appropriate TileInfo
-     * such that broadcasting is taken into account, based on the arguments provided.
-     *
-     * @tparam     TArgs Must be of TileInfo type.
-     * @param[in]  args  A number of TileInfo which determine the shape of the TileOperand to declare.
-     * @return     A newly created TileOperand.
-     */
-    template <typename... TArgs, typename = ::std::enable_if_t<std::is_same<TArgs..., TileInfo>::value>>
-    TileOperand &declare_temp_tile(const TArgs &...args)
-    {
-        return TWriter::declare_tile(next_tmp_var_name().c_str(), get_largest_size(args...));
-    }
-};
-
-} // namespace ckw
-
-#endif // CKW_INCLUDE_CKW_KERNELWRITERHELPER_H
diff --git a/compute_kernel_writer/prototype/include/ckw/OperandBase.h b/compute_kernel_writer/prototype/include/ckw/OperandBase.h
deleted file mode 100644
index 9842127339..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/OperandBase.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H
-
-#include "ckw/types/DataType.h"
-
-#include <string>
-
-namespace ckw
-{
-namespace prototype
-{
-class IGpuKernelWriter;
-
-class Operand;
-} // namespace prototype
-
-/** The base class for all operands. */
-class OperandBase
-{
-public:
-    /** Constructor
-     *
-     * @param[in] name The name of the operand.
-     */
-    explicit OperandBase(const ::std::string &name);
-
-    /** Destructor */
-    virtual ~OperandBase();
-
-    /** (Internal use only) Create the implementation operand.
-     *
-     * @param[in] writer The implementation kernel writer.
-     */
-    virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const = 0;
-
-    /** Get the name of the operand. */
-    const ::std::string &name() const;
-
-    /** Set the name of the operand. */
-    OperandBase &name(const ::std::string &name);
-
-    /** Get the data type of the operand. */
-    virtual DataType data_type() const = 0;
-
-    /** Get whether the operand is compile-time constant. */
-    virtual bool is_constant() const = 0;
-
-private:
-    ::std::string _name;
-};
-
-} // namespace ckw
-
-#endif // CKW_PROTOTYPE_INCLUDE_CKW_OPERANDBASE_H
diff --git a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h b/compute_kernel_writer/prototype/include/ckw/ScalarValue.h
deleted file mode 100644
index 2a9c42acc8..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/ScalarValue.h
+++ /dev/null
@@ -1,137 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_SCALARVALUE_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_SCALARVALUE_H
-
-#include "ckw/Error.h"
-
-#include <cstdint>
-
-namespace ckw
-{
-
-/** The scalar value known at compile-time. */
-class ScalarValue
-{
-public:
-    /** Initialize a new instance of @ref ScalarValue class with integer value 0. */
-    ScalarValue()
-    {
-        _type      = Type::INT;
-        _value.i64 = 0;
-    }
-
-    /** Initialize a new instance of @ref ScalarValue class with the specified value. */
-    template <typename T>
-    ScalarValue(T value)
-    {
-        set(value);
-    }
-
-    /** Set the value. */
-    template <typename T>
-    void set(T value)
-    {
-        CKW_ASSERT(::std::is_integral<T>::value || ::std::is_floating_point<T>::value);
-        CKW_ASSERT(sizeof(T) <= 8);
-
-        _size = sizeof(T);
-
-        if (::std::is_integral<T>::value)
-        {
-            if (::std::is_signed<T>::value)
-            {
-                _type      = Type::INT;
-                _value.i64 = value;
-            }
-            else
-            {
-                _type      = Type::UINT;
-                _value.u64 = value;
-            }
-        }
-        else
-        {
-            _type      = Type::FLOAT;
-            _value.f64 = value;
-        }
-    }
-
-    /** Get the value.
-     *
-     * The caller must make sure that what has been stored in the object must fit
-     * the output data type without data corruption or loss of accuracy.
-     */
-    template <typename T>
-    T get() const
-    {
-        CKW_ASSERT(::std::is_integral<T>::value || ::std::is_floating_point<T>::value);
-        CKW_ASSERT(sizeof(T) >= _size);
-
-        if (::std::is_integral<T>::value)
-        {
-            if (::std::is_signed<T>::value)
-            {
-                CKW_ASSERT(_type == Type::INT || _type == Type::UINT);
-                CKW_ASSERT_IF(_type == Type::UINT, sizeof(T) > _size);
-
-                return _value.i64;
-            }
-            else
-            {
-                CKW_ASSERT(_type == Type::INT);
-
-                return _value.u64;
-            }
-        }
-        else
-        {
-            return _value.f64;
-        }
-    }
-
-private:
-    union Value
-    {
-        int64_t  i64;
-        uint64_t u64;
-        double   f64;
-    };
-
-    enum class Type : int32_t
-    {
-        UINT,
-        INT,
-        FLOAT,
-    };
-
-    Value    _value{};
-    Type     _type{};
-    uint32_t _size{};
-};
-
-} // namespace ckw
-
-#endif // CKW_PROTOTYPE_INCLUDE_CKW_SCALARVALUE_H
diff --git a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h
deleted file mode 100644
index 24da7dc8ab..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h
+++ /dev/null
@@ -1,153 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H
-
-#include "ckw/types/DataType.h"
-
-#include <array>
-#include <cstdint>
-
-namespace ckw
-{
-/** Compute Kernel Writer tensor data layout (or memory format) */
-enum class TensorDataLayout
-{
-    Unknown,
-    Nhwc,
-    Ndhwc
-};
-
-/** Compute Kernel Writer tensor data layout component */
-enum class TensorDataLayoutComponent
-{
-    Unknown,
-    N,
-    D,
-    H,
-    W,
-    C,
-};
-
-/** Compute Kernel Writer tensor component bitmask. The bitmask can be used to retrieve
- *  the info from @ref TensorComponent.
- */
-enum class TensorComponentBitmask : uint32_t
-{
-    OffsetFirstElement = 0x01000000, // For example, OffsetFirstElement in @ref TensorComponent
-    Stride             = 0x02000000, // For example, stride0 in @ref TensorComponent
-    Dimension          = 0x04000000, // For example, Dim0 in @ref TensorComponent
-    FoldedDimensions   = 0x08000000, // For example, Dim0xDim1 in @ref TensorComponent
-};
-
-/** Compute Kernel Writer tensor component. The tensor components are used to access specific backend-agnostic tensor arguments,
- *  such as the tensor dimensions and tensor strides.
- *  The data type is represented as an integer. The value of the integer value
- *  is assigned to retrieve the information through the @ref TensorComponentBitmask.
- */
-enum class TensorComponentType : uint32_t
-{
-    Unknown            = 0x00000000,
-    OffsetFirstElement = 0x01000000,
-    Stride0            = 0x02000001,
-    Stride1            = 0x02000010,
-    Stride2            = 0x02000100,
-    Stride3            = 0x02001000,
-    Stride4            = 0x02010000,
-    Dim0               = 0x04000001,
-    Dim1               = 0x04000010,
-    Dim2               = 0x04000100,
-    Dim3               = 0x04001000,
-    Dim4               = 0x04010000,
-    Dim1xDim2          = 0x08000110,
-    Dim2xDim3          = 0x08001100,
-    Dim1xDim2xDim3     = 0x08001110
-};
-
-/** Compute Kernel Writer tensor storage. The tensor storage represents the type of tensor memory object.
- */
-enum class TensorStorageType : uint32_t
-{
-    Unknown            = 0x00000000,
-    BufferUint8Ptr     = 0x01000000,
-    Texture2dReadOnly  = 0x02000001,
-    Texture2dWriteOnly = 0x02000010,
-};
-
-/** Compute Kernel Writer tensor shape
- *  Negative dimensions can be interpreted as dynamic dimensions by the Compute Kernel Writer
- */
-using TensorShape = std::array<int32_t, 5>;
-
-/** Compute Kernel Writer tensor info */
-class TensorInfo
-{
-public:
-    /** Constructor
-     *
-     * @param[in] dt    Tensor data type
-     * @param[in] shape Tensor shape
-     * @param[in] dl    Tensor data layout
-     * @param[in] id    Tensor id. The id is used to keep track of the bound user tensor. Through the id,
-     *                  the user can know what tensor has been used by the Compute Kernel Writer.
-     *                  Possible id values:
-     *                  - greater than or equal to 0: bind a user specific tensors
-     *                  - less than 0: bind a virtual tensor (tile)
-     */
-    TensorInfo(DataType dt, const TensorShape &shape, TensorDataLayout dl, int32_t id);
-
-    /** Set shape */
-    TensorInfo &shape(const TensorShape &shape);
-
-    /** Get shape */
-    TensorShape shape() const;
-
-    /** Set data type */
-    TensorInfo &data_type(DataType dt);
-
-    /** Get data type */
-    DataType data_type() const;
-
-    /** Set data layout */
-    TensorInfo &data_layout(TensorDataLayout dl);
-
-    /** Get data layout */
-    TensorDataLayout data_layout() const;
-
-    /** Set id */
-    TensorInfo &id(int32_t id);
-
-    /** Get layout */
-    int32_t id() const;
-
-private:
-    TensorShape      _shape{{0}};
-    DataType         _dt{DataType::Unknown};
-    TensorDataLayout _dl{TensorDataLayout::Unknown};
-    int32_t          _id{-1};
-};
-} // namespace ckw
-
-#endif /* CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H */
diff --git a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h
deleted file mode 100644
index c221b449fa..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h
+++ /dev/null
@@ -1,196 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TENSOROPERAND_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_TENSOROPERAND_H
-
-#include "ckw/OperandBase.h"
-#include "ckw/TensorInfo.h"
-#include "ckw/TensorTileSampler.h"
-#include "ckw/TileOperand.h"
-#include "ckw/types/DataType.h"
-
-#include <memory>
-
-namespace ckw
-{
-
-class TensorComponentOperand;
-
-// =================================================================================================
-// TensorOperand
-// =================================================================================================
-
-/** Tensor operand */
-class TensorOperand : public OperandBase
-{
-public:
-    /** Initialize a new instance of @ref TensorOperand class.
-     *
-     * @param[in] name         The name of the tensor.
-     * @param[in] info         The tensor info.
-     * @param[in] storage_type The tensor storage type.
-     */
-    TensorOperand(const ::std::string &name, const TensorInfo &info, TensorStorageType storage_type);
-
-    /** No copy constructor. */
-    TensorOperand(const TensorOperand &other) = delete;
-
-    /** No copy assignment. */
-    TensorOperand &operator=(const TensorOperand &other) = delete;
-
-    /** (Internal use only) Create the implementation operand.
-     *
-     * @param[in] writer The implementation kernel writer.
-     */
-    virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override;
-
-    /** Get the tensor info. */
-    const TensorInfo &info() const;
-
-    /** Get the tensor info. */
-    TensorInfo &info();
-
-    /** Get the tensor storage type. */
-    TensorStorageType storage_type() const;
-
-    /** Get the data type. */
-    virtual DataType data_type() const override;
-
-    /** Get whether the tensor is compile-time constant. */
-    virtual bool is_constant() const override;
-
-    /** Get the default tile attached to the tensor. */
-    const TileOperand &tile() const;
-
-    /** Get the default tile attached to the tensor. */
-    TileOperand &tile();
-
-    /** Set the default tile attached to the tensor. */
-    TensorOperand &tile(TileOperand &tile);
-
-    /** Get the tensor sampler of the default tile. */
-    const TensorTileSampler &tile_sampler() const;
-
-    /** Get the tensor sampler of the default tile. */
-    TensorTileSampler &tile_sampler();
-
-    /** Set the tensor sampler of the default tile. */
-    TensorOperand &tile_sampler(const TensorTileSampler &value);
-
-    /** Get the operand that contains the stride in y dimension of the tensor. */
-    TensorComponentOperand &stride1();
-
-    /** Get the operand that contains the stride in z dimension of the tensor. */
-    TensorComponentOperand &stride2();
-
-    /** Get the operand that contains the stride in w dimension of the tensor. */
-    TensorComponentOperand &stride3();
-
-    /** Get the operand that contains the stride in w dimension of the tensor. */
-    TensorComponentOperand &stride4();
-
-    /** Get the operand that contains the size of dimension 0 of the tensor. */
-    TensorComponentOperand &dim0();
-
-    /** Get the operand that contains the size of dimension 1 of the tensor. */
-    TensorComponentOperand &dim1();
-
-    /** Get the operand that contains the size of dimension 2 of the tensor. */
-    TensorComponentOperand &dim2();
-
-    /** Get the operand that contains the size of dimension 3 of the tensor. */
-    TensorComponentOperand &dim3();
-
-    /** Get the operand that contains the size of dimension 4 of the tensor. */
-    TensorComponentOperand &dim4();
-
-    /** Get the operand that contains the size of dimensions 1 and 2 collapsed. */
-    TensorComponentOperand &dim1_dim2();
-
-    /** Get the operand that contains the size of dimensions 1, 2 and 3 collapsed. */
-    TensorComponentOperand &dim1_dim2_dim3();
-
-    /** Get the operand that contains the offset in bytes to the first element. */
-    TensorComponentOperand &offset_first_element_in_bytes();
-
-private:
-    TensorInfo        _info;
-    TensorStorageType _storage_type;
-
-    TileOperand      *_tile{nullptr};
-    TensorTileSampler _tile_sampler{};
-
-    ::std::unique_ptr<TensorComponentOperand> _stride1{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _stride2{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _stride3{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _stride4{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _dim0{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _dim1{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _dim2{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _dim3{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _dim4{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _dim1_dim2{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _dim1_dim2_dim3{nullptr};
-    ::std::unique_ptr<TensorComponentOperand> _offset_first_element_in_bytes{nullptr};
-};
-
-// =================================================================================================
-// TensorComponentOperand
-// =================================================================================================
-
-/** Tile operand that contains tensor information. */
-class TensorComponentOperand : public TileOperand
-{
-public:
-    /** Initialize a new instance of @ref TensorComponentOperand class.
-     *
-     * @param[in] tensor    The tensor operand.
-     * @param[in] component The tensor info component.
-     */
-    TensorComponentOperand(TensorOperand &tensor, TensorComponentType component);
-
-    /** Get the tensor operand. */
-    TensorOperand &tensor();
-
-    /** Get the tensor operand. */
-    const TensorOperand &tensor() const;
-
-    /** Get the tensor component. */
-    TensorComponentType component_type() const;
-
-    /** (Internal use only) Create the implementation operand.
-     *
-     * @param[in] writer The implementation kernel writer.
-     */
-    virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override;
-
-private:
-    TensorOperand      &_tensor;
-    TensorComponentType _component;
-};
-
-} // namespace ckw
-
-#endif // CKW_PROTOTYPE_INCLUDE_CKW_TENSOROPERAND_H
diff --git a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h b/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h
deleted file mode 100644
index 606dec3535..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/TensorTileSampler.h
+++ /dev/null
@@ -1,169 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H
-
-#include "ckw/types/TensorSamplerTypes.h"
-
-#include <functional>
-
-namespace ckw
-{
-
-class TileOperand;
-
-/** Tensor sampler
- *
- * It contains information about how the result tile should be stored to tensor memory.
- * It can also be used to dictate how the subsequent operators fetch the input tensor.
- */
-class TensorTileSampler
-{
-public:
-    /** Initialize a new instance of @ref TensorSampler class. */
-    TensorTileSampler();
-
-    /** Initialize a new instance of @ref TensorSampler class.
-     *
-     * @param[in] x              The coordinate in the x dimension.
-     * @param[in] y              The coordinate in the y dimension.
-     * @param[in] z              The coordinate in the z dimension.
-     * @param[in] b              The coordinate in the batch dimension.
-     * @param[in] format         The tensor data format.
-     * @param[in] address_mode_x The address mode of the x dimension.
-     * @param[in] address_mode_y The address mode of the y dimension.
-     * @param[in] address_mode_z The address mode of the z dimension.
-     */
-    TensorTileSampler(TileOperand              &x,
-                      TileOperand              &y,
-                      TileOperand              &z,
-                      TileOperand              &b,
-                      TensorSamplerFormat       format,
-                      TensorSamplerAddressModeX address_mode_x,
-                      TensorSamplerAddressModeY address_mode_y,
-                      TensorSamplerAddressModeZ address_mode_z);
-
-    /** Initialize a new instance of @ref TensorSampler class.
-     *
-     * @param[in] x              The coordinate in the x dimension.
-     * @param[in] y              The coordinate in the y dimension.
-     * @param[in] z              The coordinate in the z dimension.
-     * @param[in] b              The coordinate in the batch dimension.
-     * @param[in] height         The height of the tile.
-     * @param[in] width          The width of the tile.
-     * @param[in] format         The tensor data format.
-     * @param[in] address_mode_x The address mode of the x dimension.
-     * @param[in] address_mode_y The address mode of the y dimension.
-     * @param[in] address_mode_z The address mode of the z dimension.
-     */
-    TensorTileSampler(TileOperand              &x,
-                      TileOperand              &y,
-                      TileOperand              &z,
-                      TileOperand              &b,
-                      int32_t                   height,
-                      int32_t                   width,
-                      TensorSamplerFormat       format,
-                      TensorSamplerAddressModeX address_mode_x,
-                      TensorSamplerAddressModeY address_mode_y,
-                      TensorSamplerAddressModeZ address_mode_z);
-
-    /** Get the coordinate in the x dimension. */
-    const TileOperand &x() const;
-
-    /** Set the coordinate in the x dimension. */
-    TensorTileSampler &x(TileOperand &x);
-
-    /** Get the coordinate in the y dimension. */
-    const TileOperand &y() const;
-
-    /** Set the coordinate in the y dimension. */
-    TensorTileSampler &y(TileOperand &y);
-
-    /** Get the coordinate in the z dimension. */
-    const TileOperand &z() const;
-
-    /** Set the coordinate in the z dimension. */
-    TensorTileSampler &z(TileOperand &z);
-
-    /** Get the coordinate in the batch dimension. */
-    const TileOperand &b() const;
-
-    /** Set the coordinate in the batch dimension. */
-    TensorTileSampler &b(TileOperand &b);
-
-    /** Get the width of the tile. */
-    int32_t width() const;
-
-    /** Set the width of the tile. */
-    TensorTileSampler &width(int32_t width);
-
-    /** Get the height of the tile. */
-    int32_t height() const;
-
-    /** Set the height of the tile. */
-    TensorTileSampler &height(int32_t height);
-
-    /** Get the format of the tensor. */
-    TensorSamplerFormat format() const;
-
-    /** Set the format of the tensor. */
-    TensorTileSampler &format(TensorSamplerFormat format);
-
-    /** Get the address mode of the x dimension. */
-    TensorSamplerAddressModeX address_mode_x() const;
-
-    /** Set the address mode of the x-dimension. */
-    TensorTileSampler &address_mode_x(TensorSamplerAddressModeX address_mode_x);
-
-    /** Get the address mode of the y dimension. */
-    TensorSamplerAddressModeY address_mode_y() const;
-
-    /** Set the address mode of the y dimension. */
-    TensorTileSampler &address_mode_y(TensorSamplerAddressModeY address_mode_y);
-
-    /** Get the address mode of the z dimension. */
-    TensorSamplerAddressModeZ address_mode_z() const;
-
-    /** Set the address mode of the z dimension. */
-    TensorTileSampler &address_mode_z(TensorSamplerAddressModeZ address_mode_z);
-
-private:
-    TileOperand *_x{nullptr};
-    TileOperand *_y{nullptr};
-    TileOperand *_z{nullptr};
-    TileOperand *_b{nullptr};
-
-    int32_t _height{0};
-    int32_t _width{0};
-
-    TensorSamplerFormat       _format{TensorSamplerFormat::Unknown};
-    TensorSamplerAddressModeX _address_mode_x{TensorSamplerAddressModeX::Unknown};
-    TensorSamplerAddressModeY _address_mode_y{TensorSamplerAddressModeY::Unknown};
-    TensorSamplerAddressModeZ _address_mode_z{TensorSamplerAddressModeZ::Unknown};
-};
-
-} // namespace ckw
-
-#endif // CKW_PROTOTYPE_INCLUDE_CKW_TENSORTILESAMPLER_H
diff --git a/compute_kernel_writer/prototype/include/ckw/TileInfo.h b/compute_kernel_writer/prototype/include/ckw/TileInfo.h
deleted file mode 100644
index e0d064169e..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/TileInfo.h
+++ /dev/null
@@ -1,92 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TILEINFO_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_TILEINFO_H
-
-#include "ckw/types/DataType.h"
-
-#include <array>
-#include <cstdint>
-
-namespace ckw
-{
-// Constants to access the tile width and height in the TileShape
-constexpr int32_t kTileWidthIdx  = 0;
-constexpr int32_t kTileHeightIdx = 1;
-
-/** Compute Kernel Writer tile shape. It is used to define the shape of the tile */
-using TileShape = std::array<int32_t, 2>;
-
-/** Compute Kernel Writer tile info */
-class TileInfo
-{
-public:
-    /** Constructor used to initialize a scalar variable with a given data type
-     *
-     * @param[in] dt Tile data type
-     */
-    TileInfo(DataType dt);
-
-    /** Constructor used to initialize a vector with a given data type and vector length.
-     *
-     * @param[in] dt Tile data type
-     * @param[in] w  Tile width (or vector length)
-     */
-    TileInfo(DataType dt, int32_t w);
-
-    /** Constructor used to initialize a tile with a given data type and tile sizes.
-     *
-     * @param[in] dt Tile data type
-     * @param[in] h  Tile height
-     * @param[in] w  Tile width
-     */
-    TileInfo(DataType dt, int32_t h, int32_t w);
-
-    /** Set width */
-    TileInfo &width(int32_t w);
-
-    /** Get width */
-    int32_t width() const;
-
-    /** Set height */
-    TileInfo &height(int32_t h);
-
-    /** Get height */
-    int32_t height() const;
-
-    /** Set data type */
-    TileInfo &data_type(DataType dt);
-
-    /** Get data type */
-    DataType data_type() const;
-
-private:
-    DataType  _dt{DataType::Unknown};
-    TileShape _shape{};
-};
-
-} // namespace ckw
-
-#endif /* COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TILEINFO_H */
diff --git a/compute_kernel_writer/prototype/include/ckw/TileOperand.h b/compute_kernel_writer/prototype/include/ckw/TileOperand.h
deleted file mode 100644
index 24ee373a24..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/TileOperand.h
+++ /dev/null
@@ -1,127 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_INCLUDE_CKW_TILEOPERAND_H
-#define CKW_PROTOTYPE_INCLUDE_CKW_TILEOPERAND_H
-
-#include "ckw/Error.h"
-#include "ckw/OperandBase.h"
-#include "ckw/ScalarValue.h"
-#include "ckw/TileInfo.h"
-
-#include <vector>
-
-namespace ckw
-{
-
-class Kernel;
-
-using TileContainer = std::vector<std::vector<std::string>>;
-
-/** Tile operand which can be either scalar, vector or 2D tile. */
-class TileOperand : public OperandBase
-{
-public:
-    /** Initialize a new instance of @ref TileOperand class with the tile information.
-     *
-     * @param[in] name      The name of the tile.
-     * @param[in] tile_info The tile info.
-     */
-    TileOperand(const ::std::string &name, const TileInfo &tile_info);
-
-    /** Initialize a new instance of @ref TileOperand for scalar variable.
-     *
-     * @param[in] name      The name of the tile.
-     * @param[in] data_type The data type of the tile.
-     */
-    TileOperand(const ::std::string &name, DataType data_type);
-
-    /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable.
-     *
-     * @param[in] name  The name of the tile.
-     * @param[in] value The value of the tile.
-     */
-    TileOperand(const ::std::string &name, int32_t value);
-
-    /** Initialize a new instance of @ref TileOperand for compile-time constant scalar variable.
-     *
-     * @param[in] name  The name of the tile.
-     * @param[in] value The value of the tile.
-     */
-    TileOperand(const ::std::string &name, float value);
-
-    /** Initialize a new instance of @ref TileOperand for compile-time constant variable.
-     *
-     * @param[in] name  The name of the tile.
-     * @param[in] value The value of the tile.
-     */
-    TileOperand(const ::std::string &name, const ::std::vector<std::vector<std::string>> &value, DataType dt);
-
-    /** Prohibit copy of tile operand. */
-    TileOperand(const TileOperand &) = delete;
-
-    /** Prohibit copy of tile operand. */
-    TileOperand &operator=(const TileOperand &) = delete;
-
-    /** (Internal use only) Create the implementation operand.
-     *
-     * @param[in] writer The implementation kernel writer.
-     */
-    virtual prototype::Operand create_impl_operand(prototype::IGpuKernelWriter *writer) const override;
-
-    /** Get the tile info. */
-    const TileInfo &tile_info() const;
-
-    /** Get the data type of the tile. */
-    virtual DataType data_type() const override;
-
-    /** Get whether the tile is compile-time constant. */
-    virtual bool is_constant() const override;
-
-    /** Get whether the tile is a scalar value. */
-    bool is_scalar() const;
-
-    /** Get the scalar value of the tile.
-     *
-     * The tile must have the shape of 1, 1 (i.e. scalar).
-     *
-     * @return Scalar value as a string.
-     */
-    std::string scalar_value() const;
-
-    /** Get the values of the tile.
-     *
-     * @return 2D container of values.
-     */
-    const TileContainer &value() const;
-
-private:
-    TileInfo      _info;
-    TileContainer _value{};
-    bool          _constant;
-};
-
-} // namespace ckw
-
-#endif // CKW_PROTOTYPE_INCLUDE_CKW_TILEOPERAND_H
diff --git a/compute_kernel_writer/prototype/include/ckw/types/ConvertPolicy.h b/compute_kernel_writer/prototype/include/ckw/types/ConvertPolicy.h
deleted file mode 100644
index 2a198507eb..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/types/ConvertPolicy.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_INCLUDE_CKW_CONVERTPOLICY_H
-#define CKW_INCLUDE_CKW_CONVERTPOLICY_H
-
-#include <cstdint>
-
-namespace ckw
-{
-
-enum class ConvertPolicy : int32_t
-{
-    None     = 0, // No policy specified.
-    Saturate = 1, // Saturated.
-};
-
-} // namespace ckw
-
-#endif //CKW_INCLUDE_CKW_CONVERTPOLICY_H
diff --git a/compute_kernel_writer/prototype/include/ckw/types/DataType.h b/compute_kernel_writer/prototype/include/ckw/types/DataType.h
deleted file mode 100644
index 3447dd61d6..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/types/DataType.h
+++ /dev/null
@@ -1,50 +0,0 @@
-/*
-* Copyright (c) 2023 Arm Limited.
-*
-* SPDX-License-Identifier: MIT
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy
-* of this software and associated documentation files (the "Software"), to
-* deal in the Software without restriction, including without limitation the
-* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-* sell copies of the Software, and to permit persons to whom the Software is
-* furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in all
-* copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
-
-#ifndef CKW_INCLUDE_CKW_DATATYPE_H
-#define CKW_INCLUDE_CKW_DATATYPE_H
-
-#include <cstdint>
-
-namespace ckw
-{
-
-/** Compute Kernel Writer data types. This data type is used by the code variables and tensor arguments. */
-enum class DataType : int32_t
-{
-    Unknown = 0x00,
-    Fp32    = 0x11,
-    Fp16    = 0x12,
-    Int32   = 0x21,
-    Int16   = 0x22,
-    Int8    = 0x24,
-    Uint32  = 0x31,
-    Uint16  = 0x32,
-    Uint8   = 0x34,
-    Bool    = 0x41
-};
-
-} // namespace ckw
-
-#endif //CKW_INCLUDE_CKW_DATATYPE_H
diff --git a/compute_kernel_writer/prototype/include/ckw/types/Functions.h b/compute_kernel_writer/prototype/include/ckw/types/Functions.h
deleted file mode 100644
index c6afaa0ac8..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/types/Functions.h
+++ /dev/null
@@ -1,62 +0,0 @@
-/*
-* Copyright (c) 2023 Arm Limited.
-*
-* SPDX-License-Identifier: MIT
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy
-* of this software and associated documentation files (the "Software"), to
-* deal in the Software without restriction, including without limitation the
-* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-* sell copies of the Software, and to permit persons to whom the Software is
-* furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in all
-* copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
-
-#ifndef CKW_INCLUDE_CKW_FUNCTIONS_H
-#define CKW_INCLUDE_CKW_FUNCTIONS_H
-
-#include <cstdint>
-
-namespace ckw
-{
-
-enum class UnaryFunction : int32_t
-{
-    Exp   = 0x0000,
-    Tanh  = 0x0001,
-    Sqrt  = 0x0002,
-    Erf   = 0x0003,
-    Fabs  = 0x0004,
-    Log   = 0x0006,
-    Round = 0x0007,
-    Floor = 0x0008,
-
-    // Misc
-    SizeOf = 0x0009,
-};
-
-enum class BinaryFunction : int32_t
-{
-    Min = 0x0000,
-    Max = 0x0001,
-};
-
-enum class TernaryFunction : int32_t
-{
-    Select = 0x0000,
-    Clamp  = 0x0001,
-};
-
-} // namespace ckw
-
-#endif //CKW_INCLUDE_CKW_FUNCTIONS_H
diff --git a/compute_kernel_writer/prototype/include/ckw/types/GpuTargetLanguage.h b/compute_kernel_writer/prototype/include/ckw/types/GpuTargetLanguage.h
deleted file mode 100644
index 6c08617949..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/types/GpuTargetLanguage.h
+++ /dev/null
@@ -1,41 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_INCLUDE_CKW_GPUTARGETLANGUAGE_H
-#define CKW_INCLUDE_CKW_GPUTARGETLANGUAGE_H
-
-#include <cstdint>
-
-namespace ckw
-{
-
-enum class GpuTargetLanguage : int32_t
-{
-    Unknown,
-    OpenCL
-};
-
-} // namespace ckw
-
-#endif //CKW_INCLUDE_CKW_GPUTARGETLANGUAGE_H
diff --git a/compute_kernel_writer/prototype/include/ckw/types/Operators.h b/compute_kernel_writer/prototype/include/ckw/types/Operators.h
deleted file mode 100644
index b560996837..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/types/Operators.h
+++ /dev/null
@@ -1,78 +0,0 @@
-/*
-* Copyright (c) 2023 Arm Limited.
-*
-* SPDX-License-Identifier: MIT
-*
-* Permission is hereby granted, free of charge, to any person obtaining a copy
-* of this software and associated documentation files (the "Software"), to
-* deal in the Software without restriction, including without limitation the
-* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
-* sell copies of the Software, and to permit persons to whom the Software is
-* furnished to do so, subject to the following conditions:
-*
-* The above copyright notice and this permission notice shall be included in all
-* copies or substantial portions of the Software.
-*
-* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
-* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
-* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
-* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
-* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
-* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
-* SOFTWARE.
-*/
-
-#ifndef CKW_INCLUDE_CKW_OPERATORS_H
-#define CKW_INCLUDE_CKW_OPERATORS_H
-
-#include <cstdint>
-
-namespace ckw
-{
-
-enum class UnaryOp : int32_t
-{
-    LogicalNot = 0x0000, // !
-    BitwiseNot = 0x0001, // ~
-    Negate     = 0x0002, // -
-};
-
-/* Binary operations
-*/
-enum class BinaryOp : int32_t
-{
-    // Elementwise
-    Add = 0x0000, // +
-    Sub = 0x0001, // -
-    Mul = 0x0002, // *
-    Div = 0x0003, // /
-    Mod = 0x0004, // %
-    // Relational
-    Equal        = 0x1000, // ==
-    Less         = 0x1001, // <
-    LessEqual    = 0x1002, // <=
-    Greater      = 0x1003, // >
-    GreaterEqual = 0x1004, // >=
-    // Algebra
-    MatMul_Nt_Nt = 0x2000, // X
-    MatMul_Nt_T  = 0x2001, // X
-    MatMul_T_Nt  = 0x2002, // X
-    MatMul_T_T   = 0x2003, // X
-    Dot          = 0x2004, // .
-    // Logical
-    LogicalAnd = 0x3000, // &&
-    LogicalOr  = 0x3001, // ||
-    // Bitwise
-    BitwiseXOR = 0x4000, // ^
-};
-
-enum class AssignmentOp : int32_t
-{
-    // Unary
-    Increment = 0x0000, // +=
-    Decrement = 0x0001, // -=
-};
-
-} // namespace ckw
-
-#endif //CKW_INCLUDE_CKW_OPERATORS_H
diff --git a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h b/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h
deleted file mode 100644
index 63405a0764..0000000000
--- a/compute_kernel_writer/prototype/include/ckw/types/TensorSamplerTypes.h
+++ /dev/null
@@ -1,82 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H
-#define CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H
-
-#include <cstdint>
-
-namespace ckw
-{
-
-enum class TensorSamplerFormat : int32_t
-{
-    Unknown = 0,
-    C_WH_1  = 1,
-    C_W_H   = 2
-};
-
-enum class TensorSamplerAddressModeX : int32_t
-{
-    Unknown = 0,
-    None    = 1, // The user guarantees that the X coordinate is always in-bound
-    OverlappingMin =
-        2 // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length
-    // Leftover elements can be handled using overlapping. This involves processing some of the elements in the array twice.
-};
-
-enum class TensorSamplerAddressModeY : int32_t
-{
-    Unknown = 0,
-    None    = 1, // The user guarantees that the Y coordinate is always in-bound
-    OverlappingMin =
-        2, // (FIXED shapes only) Reduce the load/store length when x == 0 (MIN). The load length will be width % original length
-    Skip = 3, // Skip the read/write
-    SkipMinEdgeOnly =
-        4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0
-    SkipMaxEdgeOnly    = 5, // Skip less than 0 only
-    ClampToNearest     = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y)
-    ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX
-    ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0
-    ClampToBorder      = 9, // Clamp to border which always has 0 value
-    ClampToBorderMinEdgeOnly = 10,
-    ClampToBorderMaxEdgeOnly = 11
-};
-
-enum class TensorSamplerAddressModeZ : int32_t
-{
-    Unknown = 0,
-    None    = 1, // The user guarantees that the Y coordinate is always in-bound
-    Skip    = 3, // Skip the read/write
-    SkipMinEdgeOnly =
-        4, // Skip greater than or equal to max only. The user guarantees that the Y coordinate is always >= 0
-    SkipMaxEdgeOnly    = 5, // Skip less than 0 only
-    ClampToNearest     = 6, // Clamp the coordinate to nearest edge (0 or max value allowed on Y)
-    ClampToMinEdgeOnly = 7, // Clamp the negative coordinate to 0 only. Therefore, we expect Y to be always < MAX
-    ClampToMaxEdgeOnly = 8, // Clamp the coordinate to the max value allowed on Y only. We expect Y to be always >= 0
-};
-
-} // namespace ckw
-
-#endif //CKW_INCLUDE_CKW_TENSORSAMPLERTYPES_H
diff --git a/compute_kernel_writer/prototype/src/Kernel.cpp b/compute_kernel_writer/prototype/src/Kernel.cpp
deleted file mode 100644
index 6228ed17d0..0000000000
--- a/compute_kernel_writer/prototype/src/Kernel.cpp
+++ /dev/null
@@ -1,163 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/Kernel.h"
-
-#include "ckw/TensorOperand.h"
-#include "ckw/types/GpuTargetLanguage.h"
-
-#include "src/Prototype.h"
-
-namespace ckw
-{
-
-Kernel::Kernel(GpuTargetLanguage language) : Kernel{"unnamed", language}
-{
-}
-
-Kernel::Kernel(const char *name, GpuTargetLanguage language)
-    : _name(name),
-      _kernel(std::make_unique<prototype::GpuKernelWriterDataHolder>(language)),
-      _operands{},
-      _tensor_id_operands{}
-{
-}
-
-Kernel::~Kernel()
-{
-}
-
-const std::string &Kernel::name() const
-{
-    return _name;
-}
-
-void Kernel::name(const std::string &name)
-{
-    _name = name;
-}
-std::vector<KernelArgument> Kernel::arguments() const
-{
-    std::vector<KernelArgument> arguments;
-
-    const auto impl_args = _kernel->arguments.tensor_argument_declarations();
-
-    for (auto tensor_arg : impl_args)
-    {
-        auto tensor = _tensor_id_operands.at(tensor_arg->format().id);
-        arguments.push_back(*tensor);
-
-        for (auto component_arg : tensor_arg->component_declarations())
-        {
-            switch (component_arg)
-            {
-                case TensorComponentType::OffsetFirstElement:
-                    arguments.push_back(tensor->offset_first_element_in_bytes());
-                    break;
-
-                case TensorComponentType::Stride1:
-                    arguments.push_back(tensor->stride1());
-                    break;
-
-                case TensorComponentType::Stride2:
-                    arguments.push_back(tensor->stride2());
-                    break;
-
-                case TensorComponentType::Stride3:
-                    arguments.push_back(tensor->stride3());
-                    break;
-
-                case TensorComponentType::Stride4:
-                    arguments.push_back(tensor->stride4());
-                    break;
-
-                case TensorComponentType::Dim0:
-                    arguments.push_back(tensor->dim0());
-                    break;
-
-                case TensorComponentType::Dim1:
-                    arguments.push_back(tensor->dim1());
-                    break;
-
-                case TensorComponentType::Dim2:
-                    arguments.push_back(tensor->dim2());
-                    break;
-
-                case TensorComponentType::Dim3:
-                    arguments.push_back(tensor->dim3());
-                    break;
-
-                case TensorComponentType::Dim4:
-                    arguments.push_back(tensor->dim4());
-                    break;
-
-                case TensorComponentType::Dim1xDim2:
-                    arguments.push_back(tensor->dim1_dim2());
-                    break;
-
-                case TensorComponentType::Dim1xDim2xDim3:
-                    arguments.push_back(tensor->dim1_dim2_dim3());
-                    break;
-
-                default:
-                    CKW_ASSERT(false);
-            }
-        }
-    }
-
-    return arguments;
-}
-
-TileOperand &Kernel::register_operand(std::unique_ptr<TileOperand> operand)
-{
-    const auto &name = operand->name();
-    auto        ptr  = operand.get();
-
-    CKW_ASSERT(_operands.find(name) == _operands.end());
-    _operands[name] = std::move(operand);
-
-    return *ptr;
-}
-
-TensorOperand &Kernel::register_operand(std::unique_ptr<TensorOperand> operand)
-{
-    const auto  id   = operand->info().id();
-    const auto &name = operand->name();
-    auto        ptr  = operand.get();
-
-    CKW_ASSERT(_tensor_id_operands.find(id) == _tensor_id_operands.end());
-    CKW_ASSERT(_operands.find(name) == _operands.end());
-
-    _tensor_id_operands[id] = operand.get();
-    _operands[name]         = std::move(operand);
-
-    return *ptr;
-}
-
-prototype::GpuKernelWriterDataHolder *Kernel::impl()
-{
-    return _kernel.get();
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/prototype/src/KernelArgument.cpp b/compute_kernel_writer/prototype/src/KernelArgument.cpp
deleted file mode 100644
index 24ace28eb3..0000000000
--- a/compute_kernel_writer/prototype/src/KernelArgument.cpp
+++ /dev/null
@@ -1,66 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/KernelArgument.h"
-
-#include "ckw/Error.h"
-#include "ckw/TensorOperand.h"
-
-namespace ckw
-{
-
-KernelArgument::KernelArgument(TensorOperand &tensor) : _type(Type::TensorStorage), _id(tensor.info().id())
-{
-    _sub_id.tensor_storage_type = tensor.storage_type();
-}
-
-KernelArgument::KernelArgument(TensorComponentOperand &tensor_component)
-    : _type(Type::TensorComponent), _id(tensor_component.tensor().info().id())
-{
-    _sub_id.tensor_component_type = tensor_component.component_type();
-}
-
-KernelArgument::Type KernelArgument::type() const
-{
-    return _type;
-}
-
-int32_t KernelArgument::id() const
-{
-    return _id;
-}
-
-TensorStorageType KernelArgument::tensor_storage_type() const
-{
-    CKW_ASSERT(_type == Type::TensorStorage);
-    return _sub_id.tensor_storage_type;
-}
-
-TensorComponentType KernelArgument::tensor_component_type() const
-{
-    CKW_ASSERT(_type == Type::TensorComponent);
-    return _sub_id.tensor_component_type;
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp
deleted file mode 100644
index 9f58d9fefa..0000000000
--- a/compute_kernel_writer/prototype/src/KernelWriter.cpp
+++ /dev/null
@@ -1,371 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/KernelWriter.h"
-
-#include "ckw/Error.h"
-#include "ckw/TensorInfo.h"
-#include "ckw/TensorOperand.h"
-
-#include "src/Prototype.h"
-
-#include <sstream>
-
-namespace ckw
-{
-
-namespace
-{
-
-inline prototype::TensorInfo create_impl_tensor_info(const TensorInfo &info)
-{
-    return prototype::TensorInfo{info.shape(), info.data_type(), info.data_layout(), info.id()};
-}
-
-} // namespace
-
-// =================================================================================================
-// Constructors and destructor
-// =================================================================================================
-
-KernelWriter::KernelWriter(Kernel &kernel)
-    : _kernel(&kernel),
-      _impl_attr(std::make_unique<prototype::GpuKernelWriterAttribute>()),
-      _impl(prototype::GpuKernelWriterFactory::create(_impl_attr.get(), kernel.impl()))
-{
-    _impl->set_IdSpace(1);
-}
-
-KernelWriter::~KernelWriter()
-{
-}
-
-// =================================================================================================
-// Scope management
-// =================================================================================================
-
-int32_t KernelWriter::id_space() const
-{
-    return _id_space;
-}
-
-KernelWriter &KernelWriter::id_space(int32_t id_space)
-{
-    CKW_ASSERT(id_space <= _max_id_space);
-
-    _id_space = id_space;
-    return *this;
-}
-
-int32_t KernelWriter::next_id_space()
-{
-    id_space(++_max_id_space);
-    return _id_space;
-}
-
-// =================================================================================================
-// Tensor and tile declaration
-// =================================================================================================
-
-TensorOperand &
-KernelWriter::declare_tensor_argument(const std::string &name, const TensorInfo &info, TensorStorageType storage_type)
-{
-    const auto var_name = generate_variable_name(name);
-
-    _impl->declare_argument(var_name, create_impl_tensor_info(info));
-
-    auto &operand = _kernel->register_operand(std::make_unique<TensorOperand>(var_name, info, storage_type));
-
-    return operand;
-}
-
-TileOperand &KernelWriter::declare_tile_argument(const std::string &name, int32_t value)
-{
-    const auto var_name = generate_variable_name(name);
-
-    auto &operand = _kernel->register_operand(std::make_unique<TileOperand>(var_name, value));
-
-    return operand;
-}
-
-std::string KernelWriter::generate_variable_name(const std::string &name) const
-{
-    std::stringstream var_name;
-
-    var_name << "_" << _id_space << "_" << name;
-
-    return var_name.str();
-}
-
-TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> operand_ptr)
-{
-    auto       &operand = _kernel->register_operand(std::move(operand_ptr));
-    const auto &name    = operand.name();
-
-    if (!operand.is_constant())
-    {
-        const auto &info = operand.tile_info();
-
-        _impl->declare_tile(name, prototype::TileInfo(info.data_type(), info.width(), info.height()));
-    }
-    else
-    {
-        _impl->declare_const_tile(name, operand.value(), operand.data_type());
-    }
-
-    return operand;
-}
-
-// =================================================================================================
-// Load and store
-// =================================================================================================
-
-void KernelWriter::op_load(TileOperand             &tile,
-                           const TensorOperand     &tensor,
-                           const TensorTileSampler &sampler,
-                           const TileOperand       &dilation_y)
-{
-    prototype::TensorOperand impl_tensor(
-        tensor.name(),
-        prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()),
-                              sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()});
-
-    auto impl_x = sampler.x().create_impl_operand(_impl.get());
-    auto impl_y = sampler.y().create_impl_operand(_impl.get());
-    auto impl_z = sampler.z().create_impl_operand(_impl.get());
-    auto impl_b = sampler.b().create_impl_operand(_impl.get());
-
-    auto impl_dilation_y = dilation_y.create_impl_operand(_impl.get());
-
-    auto impl_dst = tile.create_impl_operand(_impl.get());
-
-    _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b, impl_dilation_y);
-}
-
-void KernelWriter::op_load_indirect(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler)
-{
-    prototype::TensorOperand impl_tensor(
-        tensor.name(),
-        prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()),
-                              sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()});
-
-    auto impl_x = sampler.x().create_impl_operand(_impl.get());
-    auto impl_y = sampler.y().create_impl_operand(_impl.get());
-    auto impl_z = sampler.z().create_impl_operand(_impl.get());
-    auto impl_b = sampler.b().create_impl_operand(_impl.get());
-
-    auto impl_dst = tile.create_impl_operand(_impl.get());
-
-    _impl->op_load_indirect(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b);
-}
-
-void KernelWriter::util_get_indirect_buffer(TileOperand             &tile,
-                                            const TensorOperand     &tensor,
-                                            const TensorTileSampler &sampler,
-                                            const TileOperand       &x,
-                                            const TileOperand       &y,
-                                            const TileOperand       &x_off,
-                                            const TileOperand       &y_off)
-{
-    prototype::TensorOperand impl_tensor(
-        tensor.name(),
-        prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()),
-                              sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()});
-
-    auto impl_x     = x.create_impl_operand(_impl.get());
-    auto impl_y     = y.create_impl_operand(_impl.get());
-    auto impl_x_off = x_off.create_impl_operand(_impl.get());
-    auto impl_y_off = y_off.create_impl_operand(_impl.get());
-
-    auto impl_dst = tile.create_impl_operand(_impl.get());
-
-    _impl->util_get_indirect_buffer(impl_dst, impl_tensor, impl_x, impl_y, impl_x_off, impl_y_off);
-}
-
-void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler)
-{
-    prototype::TensorOperand impl_tensor(
-        tensor.name(),
-        prototype::GpuSampler{sampler.format(), prototype::to_gpu_tensor_storage(tensor.storage_type()),
-                              sampler.address_mode_x(), sampler.address_mode_y(), sampler.address_mode_z()});
-    auto impl_src = tile.create_impl_operand(_impl.get());
-    auto impl_x   = sampler.x().create_impl_operand(_impl.get());
-    auto impl_y   = sampler.y().create_impl_operand(_impl.get());
-    auto impl_z   = sampler.z().create_impl_operand(_impl.get());
-    auto impl_b   = sampler.b().create_impl_operand(_impl.get());
-
-    _impl->op_store_immediate(impl_tensor, impl_src, impl_x, impl_y, impl_z, impl_b);
-}
-
-// =================================================================================================
-// Data processing
-// =================================================================================================
-
-void KernelWriter::op_assign(const TileOperand &dst, const TileOperand &src)
-{
-    auto impl_dst = dst.create_impl_operand(_impl.get());
-    auto impl_src = src.create_impl_operand(_impl.get());
-
-    _impl->op_assign(impl_dst, impl_src);
-}
-
-void KernelWriter::op_cast_expression(const TileOperand &dst, const TileOperand &src, const ConvertPolicy policy)
-{
-    auto impl_dst = dst.create_impl_operand(_impl.get());
-    auto impl_src = src.create_impl_operand(_impl.get());
-
-    _impl->op_cast_expression(impl_dst, impl_src, policy);
-}
-
-void KernelWriter::op_binary_expression(const TileOperand &dst,
-                                        const TileOperand &lhs,
-                                        BinaryOp           op,
-                                        const TileOperand &rhs)
-{
-    auto impl_lhs = lhs.create_impl_operand(_impl.get());
-    auto impl_rhs = rhs.create_impl_operand(_impl.get());
-    auto impl_dst = dst.create_impl_operand(_impl.get());
-
-    _impl->op_binary_expression(impl_dst, impl_lhs, op, impl_rhs);
-}
-
-void KernelWriter::op_unary_expression(const TileOperand &dst, UnaryOp op, const TileOperand &src)
-{
-    auto impl_dst = dst.create_impl_operand(_impl.get());
-    auto impl_src = src.create_impl_operand(_impl.get());
-
-    _impl->op_unary_expression(impl_dst, op, impl_src);
-}
-
-void KernelWriter::op_unary_elementwise_function(const TileOperand &dst, UnaryFunction opcode, const TileOperand &src)
-{
-    auto impl_dst = dst.create_impl_operand(_impl.get());
-    auto impl_src = src.create_impl_operand(_impl.get());
-
-    _impl->op_unary_elementwise_function(impl_dst, opcode, impl_src);
-}
-
-void KernelWriter::op_binary_elementwise_function(const TileOperand &dst,
-                                                  BinaryFunction     opcode,
-                                                  const TileOperand &first,
-                                                  const TileOperand &second)
-{
-    auto impl_dst    = dst.create_impl_operand(_impl.get());
-    auto impl_first  = first.create_impl_operand(_impl.get());
-    auto impl_second = second.create_impl_operand(_impl.get());
-
-    _impl->op_binary_elementwise_function(impl_dst, opcode, impl_first, impl_second);
-}
-
-void KernelWriter::op_ternary_elementwise_function(const TileOperand &dst,
-                                                   TernaryFunction    opcode,
-                                                   const TileOperand &first,
-                                                   const TileOperand &second,
-                                                   const TileOperand &third)
-{
-    auto impl_dst    = dst.create_impl_operand(_impl.get());
-    auto impl_first  = first.create_impl_operand(_impl.get());
-    auto impl_second = second.create_impl_operand(_impl.get());
-    auto impl_third  = third.create_impl_operand(_impl.get());
-
-    _impl->op_ternary_elementwise_function(impl_dst, opcode, impl_first, impl_second, impl_third);
-}
-
-void KernelWriter::op_if(const TileOperand &lhs, BinaryOp op, const TileOperand &rhs, const std::function<void()> &body)
-{
-    auto impl_lhs = lhs.create_impl_operand(_impl.get());
-    auto impl_rhs = rhs.create_impl_operand(_impl.get());
-
-    _impl->op_if_header(impl_lhs, op, impl_rhs);
-    _impl->compound_statement_begin();
-    body();
-    _impl->compound_statement_end();
-}
-
-void KernelWriter::op_else_if(const TileOperand           &lhs,
-                              BinaryOp                     op,
-                              const TileOperand           &rhs,
-                              const std::function<void()> &body)
-{
-    auto impl_lhs = lhs.create_impl_operand(_impl.get());
-    auto impl_rhs = rhs.create_impl_operand(_impl.get());
-
-    _impl->op_else_if_header(impl_lhs, op, impl_rhs);
-    _impl->compound_statement_begin();
-    body();
-    _impl->compound_statement_end();
-}
-
-void KernelWriter::op_else(const std::function<void()> &body)
-{
-    _impl->op_else_header();
-    _impl->compound_statement_begin();
-    body();
-    _impl->compound_statement_end();
-}
-
-void KernelWriter::op_for_loop(const TileOperand           &var_name,
-                               BinaryOp                     cond_op,
-                               const TileOperand           &cond_value_name,
-                               const TileOperand           &update_var_name,
-                               AssignmentOp                 update_op,
-                               const TileOperand           &update_value_name,
-                               const std::function<void()> &body)
-{
-    auto impl_var_name          = var_name.create_impl_operand(_impl.get());
-    auto impl_cond_value_name   = cond_value_name.create_impl_operand(_impl.get());
-    auto impl_update_var_name   = update_var_name.create_impl_operand(_impl.get());
-    auto impl_update_value_name = update_value_name.create_impl_operand(_impl.get());
-
-    _impl->op_for_loop_header(impl_var_name, cond_op, impl_cond_value_name, impl_update_var_name, update_op,
-                              impl_update_value_name);
-    _impl->compound_statement_begin();
-    body();
-    _impl->compound_statement_end();
-}
-
-// =================================================================================================
-// Misc
-// =================================================================================================
-
-void KernelWriter::op_get_global_id(const TileOperand &dst, int32_t dim)
-{
-    _impl->op_get_global_id(prototype::Operand(dst.name()), dim);
-}
-
-void KernelWriter::op_return()
-{
-    _impl->op_return();
-}
-
-// =================================================================================================
-// Code generation
-// =================================================================================================
-
-std::string KernelWriter::generate_code()
-{
-    return prototype::generate_code(*_kernel->impl(), _kernel->name());
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/prototype/src/OperandBase.cpp b/compute_kernel_writer/prototype/src/OperandBase.cpp
deleted file mode 100644
index e0617fdc06..0000000000
--- a/compute_kernel_writer/prototype/src/OperandBase.cpp
+++ /dev/null
@@ -1,49 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/OperandBase.h"
-
-namespace ckw
-{
-
-OperandBase::OperandBase(const std::string &name) : _name(name)
-{
-}
-
-OperandBase::~OperandBase()
-{
-}
-
-const std::string &OperandBase::name() const
-{
-    return _name;
-}
-
-OperandBase &OperandBase::name(const std::string &name)
-{
-    _name = name;
-    return *this;
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h
deleted file mode 100644
index b392fe2651..0000000000
--- a/compute_kernel_writer/prototype/src/Prototype.h
+++ /dev/null
@@ -1,4189 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H
-#define CKW_PROTOTYPE_SRC_PROTOTYPE_H
-
-#include "ckw/Error.h"
-#include "ckw/TensorInfo.h"
-#include "ckw/types/ConvertPolicy.h"
-#include "ckw/types/DataType.h"
-#include "ckw/types/Functions.h"
-#include "ckw/types/GpuTargetLanguage.h"
-#include "ckw/types/Operators.h"
-#include "ckw/types/TensorSamplerTypes.h"
-
-#include <algorithm>
-#include <array>
-#include <cassert> // assert (to be removed)
-#include <chrono>
-#include <cmath>
-#include <cstdint> // int32_t
-#include <functional>
-#include <iostream> // cout (to be removed)
-#include <map>
-#include <memory>
-#include <stdexcept>
-#include <string>
-#include <unordered_map>
-#include <vector>
-
-namespace ckw
-{
-namespace prototype
-{
-
-// Dummy data structure for Size2D
-using Size2D = std::vector<int32_t>;
-
-// Dummy Status
-using Status = void;
-
-enum class ComponentType : int32_t
-{
-    Complex   = 0,
-    Simple    = 1,
-    Unfusable = 2
-};
-
-enum class GpuCompilationSpeed
-{
-    Fast = 0x00, // fast compilation may increase the latency of the network
-    Slow = 0x01  // slow compilation may decrease the latency of the network
-};
-
-enum class GpuExtensions
-{
-    Fp16,
-    Dot8,
-    Mmul,
-    FastMath
-};
-
-struct TensorInfo
-{
-    TensorShape      shape{{0}};
-    DataType         data_type{DataType::Unknown};
-    TensorDataLayout data_layout{TensorDataLayout::Nhwc};
-    int32_t          id{-1};
-};
-
-struct ComponentAttribute
-{
-    GpuCompilationSpeed compilation_speed{GpuCompilationSpeed::Fast};
-    bool                overwrite_tile{true};
-};
-
-inline std::string data_type_to_cl_type(DataType dt)
-{
-    switch (dt)
-    {
-        case DataType::Fp32:
-            return "float";
-        case DataType::Fp16:
-            return "half";
-        case DataType::Int8:
-            return "char";
-        case DataType::Uint8:
-            return "uchar";
-        case DataType::Uint16:
-            return "ushort";
-        case DataType::Int16:
-            return "short";
-        case DataType::Uint32:
-            return "uint";
-        case DataType::Int32:
-            return "int";
-        case DataType::Bool:
-            return "bool";
-        default:
-            assert(false);
-            return "";
-    }
-}
-
-inline int32_t width_to_cl_vector_size(int32_t width)
-{
-    switch (width)
-    {
-        case 1:
-            return 1;
-        case 2:
-            return 2;
-        case 3:
-            return 3;
-        case 4:
-            return 4;
-        case 5:
-        case 6:
-        case 7:
-        case 8:
-            return 8;
-        case 9:
-        case 10:
-        case 11:
-        case 12:
-        case 13:
-        case 14:
-        case 15:
-        case 16:
-            return 16;
-        default:
-            assert(false);
-            return 0;
-    }
-}
-
-inline std::string get_cl_data_type(DataType dt, int32_t width)
-{
-    std::string data_type;
-    int32_t     w = width_to_cl_vector_size(width);
-    data_type += data_type_to_cl_type(dt);
-    if (w != 1)
-    {
-        data_type += std::to_string(w);
-    }
-    return data_type;
-}
-
-inline std::string to_opencl_store(int32_t vector_length)
-{
-    if (vector_length != 1)
-    {
-        return "vstore" + std::to_string(vector_length) + "(";
-    }
-    else
-    {
-        return "*(";
-    }
-}
-
-struct TileInfo
-{
-    TileInfo()
-    {
-    }
-
-    TileInfo(DataType dt) : dt(dt), w(1), h(1)
-    {
-    }
-
-    TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1)
-    {
-    }
-
-    TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height)
-    {
-    }
-
-    DataType dt{DataType::Unknown}; // Data type of the tile
-    int32_t  w{0};                  // Width (i.e. c0 - portion of the channels)
-    int32_t  h{0};                  // Height (i.e. s0 - portion of the spatial dimensions)
-};
-
-inline std::ostream &operator<<(std::ostream &o, const TileInfo &a)
-{
-    o << a.w << " x " << a.h;
-    return o;
-}
-
-struct DataTypeAsString
-{
-    std::string str{""};
-    DataType    dt{DataType::Unknown};
-    int32_t     size{1};
-};
-
-struct ValueAsString
-{
-    std::string      str{""};
-    DataTypeAsString type{};
-};
-
-// https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c
-// A Tile is a collection of variables used to express a 2D data.
-class IScalarTile
-{
-public:
-    virtual ~IScalarTile() = default;
-
-    /** Method to get the scalar variable from a tile
-     * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
-     * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
-     *
-     * @return the scalar variable as a string
-     */
-    virtual ValueAsString scalar(int32_t x, int32_t y) const = 0;
-
-    /** Method to get the list of underlying variable names used by the tile
-     *
-     * @return the list of variable names
-     */
-    virtual std::vector<ValueAsString> underlying_source_variables() const = 0;
-
-    /** Method to get the name of the tile.
-     *
-     * @return the name of the tile
-     */
-    std::string name() const
-    {
-        return _basename;
-    }
-
-    /** Method to get the tile format
-     *
-     * @return the format
-     */
-    TileInfo format() const
-    {
-        return _format;
-    }
-
-    /** Method to know whether the tile is assignable or not (constant)
-     *
-     * @return true if the tile is assignable
-     */
-    virtual bool is_assignable() const = 0;
-
-    /** Method to know whether the tile needs to be declared
-     *
-     * @return true if the tile needs to be declared in the code before being used
-     */
-    virtual bool need_declaration() const = 0;
-
-protected:
-    TileInfo    _format{};     // Tile format
-    std::string _basename{""}; // Tile name
-};
-
-// A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context.
-// The vector size is given by the width of the tile. The number of vectors height by depth defines the number of vectors
-class IVectorTile : public IScalarTile
-{
-public:
-    virtual ~IVectorTile() = default;
-
-    /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
-     *  The user can query the list of supported width for the vectors through preferred_vector_sizes().
-     *
-     * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge
-     *
-     * @return the vector variable as a string
-     */
-    virtual ValueAsString vector(int32_t y) const = 0;
-
-    /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars.
-     *
-     * @return the vector variable as a string
-     */
-    virtual ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const = 0;
-    /** Method to get the preferred vector sizes.
-     *
-     * @return a vector with the preferred vector sizes
-     */
-    //virtual std::vector<int32_t> preferred_vector_sizes() const = 0;
-};
-
-class ClTile : public IVectorTile
-{
-public:
-    ClTile(const std::string &name, TileInfo format)
-    {
-        _format   = format;
-        _basename = name;
-    }
-
-    ValueAsString scalar(int32_t x, int32_t y) const override
-    {
-        x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
-        y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
-        ValueAsString t;
-        t.str       = build_variable_name(y);
-        t.type.str  = get_cl_data_type(_format.dt, 1);
-        t.type.dt   = _format.dt;
-        t.type.size = 1;
-
-        // Check required because if the width has only one element, we cannot use .s0
-        if (_format.w != 1)
-        {
-            // Automatic broadcasting
-            t.str += ".s" + std::to_string(x);
-        }
-
-        return t;
-    }
-
-    ValueAsString vector(int32_t y) const override
-    {
-        y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
-        ValueAsString t;
-        t.str       = build_variable_name(y);
-        t.type.str  = get_cl_data_type(_format.dt, _format.w);
-        t.type.dt   = _format.dt;
-        t.type.size = _format.w;
-        return t;
-    }
-
-    ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
-    {
-        y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
-        ValueAsString t;
-        t.str       = build_variable_name(y);
-        t.type.str  = get_cl_data_type(_format.dt, width);
-        t.type.dt   = _format.dt;
-        t.type.size = width;
-
-        if (_format.w != 1)
-        {
-            t.str += ".s";
-            for (int i = 0; i < width; ++i)
-            {
-                t.str += to_scalar_hex(x_start + i);
-            }
-        }
-        return t;
-    }
-
-    std::vector<ValueAsString> underlying_source_variables() const override
-    {
-        std::vector<ValueAsString> vars;
-        for (int32_t y = 0; y < _format.h; ++y)
-        {
-            ValueAsString t;
-            t.str       = build_variable_name(y);
-            t.type.str  = get_cl_data_type(_format.dt, _format.w);
-            t.type.dt   = _format.dt;
-            t.type.size = _format.w;
-            vars.push_back(t);
-        }
-        return vars;
-    }
-
-    bool is_assignable() const override
-    {
-        return true;
-    }
-
-    bool need_declaration() const override
-    {
-        return true;
-    }
-
-private:
-    std::string build_variable_name(int32_t y) const
-    {
-        std::string var_name = _basename;
-
-        if (_format.h == 1)
-        {
-            return var_name;
-        }
-        else
-        {
-            var_name += "_";
-            var_name += std::to_string(y);
-        }
-
-        return var_name;
-    }
-
-    std::string to_scalar_hex(int32_t x) const
-    {
-        switch (x)
-        {
-            case 0:
-            case 1:
-            case 2:
-            case 3:
-            case 4:
-            case 5:
-            case 6:
-            case 7:
-            case 8:
-            case 9:
-                return std::to_string(x);
-            case 10:
-                return "A";
-            case 11:
-                return "B";
-            case 12:
-                return "C";
-            case 13:
-                return "D";
-            case 14:
-                return "E";
-            case 15:
-                return "F";
-            default:
-                std::cout << "Unsupported hexadecimal value" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-};
-
-// Unique features: It contains values in the form of string. The name used for this object is misleading since the variables can change the value over time.
-class ClConstantTile : public IVectorTile
-{
-public:
-    ClConstantTile(const std::vector<std::vector<std::string>> &in, DataType dt)
-    {
-        _format.w  = in[0].size();
-        _format.h  = in.size();
-        _format.dt = dt;
-
-        _data = std::vector<std::vector<std::string>>(_format.h, std::vector<std::string>(_format.w));
-
-        for (int32_t y = 0; y < _format.h; ++y)
-        {
-            for (int32_t x = 0; x < _format.w; ++x)
-            {
-                _data[y][x] = in[y][x];
-            }
-        }
-    }
-
-    ValueAsString scalar(int32_t x, int32_t y) const override
-    {
-        x = std::max(std::min(x, _format.w - 1), static_cast<int32_t>(0));
-        y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
-        ValueAsString t;
-        t.str       = _data[y][x];
-        t.type.str  = get_cl_data_type(_format.dt, 1);
-        t.type.dt   = _format.dt;
-        t.type.size = 1;
-
-        return t;
-    }
-
-    ValueAsString vector(int32_t y) const override
-    {
-        y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
-        return vector(0, _format.w, y);
-    }
-
-    ValueAsString vector(int32_t x_start, int32_t width, int32_t y) const override
-    {
-        y = std::max(std::min(y, _format.h - 1), static_cast<int32_t>(0));
-
-        ValueAsString t;
-        t.str       = "";
-        t.type.str  = get_cl_data_type(_format.dt, width);
-        t.type.dt   = _format.dt;
-        t.type.size = width;
-
-        if (width > 1)
-        {
-            t.str += "((" + get_cl_data_type(_format.dt, width) + ")(";
-        }
-
-        int32_t x = x_start;
-        for (; x < width - 1; ++x)
-        {
-            t.str += scalar(x, y).str;
-            t.str += ", ";
-        }
-        t.str += scalar(x, y).str;
-
-        if (width > 1)
-        {
-            t.str += "))";
-        }
-
-        return t;
-    }
-
-    std::vector<ValueAsString> underlying_source_variables() const override
-    {
-        std::vector<ValueAsString> vars;
-
-        for (int32_t y = 0; y < _format.h; ++y)
-        {
-            for (int32_t x = 0; x < _format.w; ++x)
-            {
-                ValueAsString t;
-                t.str       = _data[y][x];
-                t.type.str  = get_cl_data_type(_format.dt, 1);
-                t.type.dt   = _format.dt;
-                t.type.size = 1;
-                vars.push_back(t);
-            }
-        }
-
-        return vars;
-    }
-
-    bool is_assignable() const override
-    {
-        return false;
-    }
-
-    bool need_declaration() const override
-    {
-        return false;
-    }
-
-private:
-    std::vector<std::vector<std::string>> _data{};
-};
-
-enum class TensorComponentIndex : int32_t
-{
-    IndexMask = 0x0000000f,
-};
-
-enum class TensorComponentGroup : int32_t
-{
-    OffsetFirstElement = 0x00000100,
-    Stride             = 0x00001000,
-    Dimension          = 0x00010000,
-    FoldedDimension    = 0x00100000,
-    Constant           = 0x01000000
-};
-
-inline std::string to_string(TensorComponentType x)
-{
-    switch (x)
-    {
-        case TensorComponentType::Unknown:
-            return "Unknown";
-        case TensorComponentType::OffsetFirstElement:
-            return "OffsetFirstElement";
-        case TensorComponentType::Stride1:
-            return "Stride1";
-        case TensorComponentType::Stride2:
-            return "Stride2";
-        case TensorComponentType::Stride3:
-            return "Stride3";
-        case TensorComponentType::Stride4:
-            return "Stride4";
-        case TensorComponentType::Dim0:
-            return "Dim0";
-        case TensorComponentType::Dim1:
-            return "Dim1";
-        case TensorComponentType::Dim2:
-            return "Dim2";
-        case TensorComponentType::Dim3:
-            return "Dim3";
-        case TensorComponentType::Dim4:
-            return "Dim4";
-        case TensorComponentType::Dim1xDim2:
-            return "Dim1xDim2";
-        case TensorComponentType::Dim1xDim2xDim3:
-            return "Dim1xDim2xDim3";
-        default:
-            assert(false);
-            return "";
-    }
-}
-
-class ITensorArgument
-{
-public:
-    virtual ~ITensorArgument() = default;
-
-    /** Method to get the tensor component as a string
-     *
-     * @param[in] x tensor component to query
-     *
-     * @return  the tensor component as a string
-     */
-    virtual std::string component(TensorComponentType x) = 0;
-
-    /** Method to get the tensor component type declaration as a string
-     *
-     * @return  the tensor component type declaration as a string
-     */
-    virtual std::string component_type_declaration() const = 0;
-
-    /** Method to get the tensor component data type
-     *
-     * @return  the tensor component data type
-     */
-    virtual DataType component_data_type() const = 0;
-
-    /** Method to get the tensor component declarations
-     *
-     * @return a vector containing the tensor component declarations
-     */
-    virtual std::vector<TensorComponentType> component_declarations() const = 0;
-
-    /** Method to get the name of the tensor argument.
-     *
-     * @return the name of the tensor argument
-     */
-    std::string name() const
-    {
-        return _basename;
-    }
-
-    /** Method to get the tensor format
-     *
-     * @return the format
-     */
-    TensorInfo format() const
-    {
-        return _format;
-    }
-
-protected:
-    TensorInfo  _format{};
-    std::string _basename{};
-};
-
-enum class GpuTensorStorage : int32_t
-{
-    Unknown          = 0x0000,
-    BufferUint8Ptr   = 0x0012,
-    Image2dReadOnly  = 0x0020,
-    Image2dWriteOnly = 0x0021,
-    Image3dReadOnly  = 0x0030,
-    Image3dWriteOnly = 0x0031
-};
-
-inline GpuTensorStorage to_gpu_tensor_storage(TensorStorageType s)
-{
-    switch (s)
-    {
-        case TensorStorageType::Unknown:
-            return GpuTensorStorage::Unknown;
-
-        case TensorStorageType::BufferUint8Ptr:
-            return GpuTensorStorage::BufferUint8Ptr;
-
-        case TensorStorageType::Texture2dReadOnly:
-            return GpuTensorStorage::Image2dReadOnly;
-
-        case TensorStorageType::Texture2dWriteOnly:
-            return GpuTensorStorage::Image2dWriteOnly;
-
-        default:
-            assert(false);
-            return GpuTensorStorage::Unknown;
-    }
-}
-
-inline TensorStorageType to_tensor_storage(GpuTensorStorage s)
-{
-    switch (s)
-    {
-        case GpuTensorStorage::Unknown:
-            return TensorStorageType::Unknown;
-
-        case GpuTensorStorage::BufferUint8Ptr:
-            return TensorStorageType::BufferUint8Ptr;
-
-        case GpuTensorStorage::Image2dReadOnly:
-            return TensorStorageType::Texture2dReadOnly;
-
-        case GpuTensorStorage::Image2dWriteOnly:
-            return TensorStorageType::Texture2dWriteOnly;
-
-        default:
-            assert(false);
-            return TensorStorageType::Unknown;
-    }
-}
-
-class IGpuTensorArgument : public ITensorArgument
-{
-public:
-    virtual ~IGpuTensorArgument() = default;
-
-    /** Method to get the tensor storage, which is the underlying storage used to keep the data memory
-     *
-     * @param[in] x tensor storage to query
-     *
-     * @return  the tensor storage as a string
-     */
-    virtual std::string storage(GpuTensorStorage x) = 0;
-
-    /** Method to get the tensor storage type declaration as a string
-     *
-     * @param[in] x tensor component to query
-     *
-     * @return  the tensor storage type declaration as a string
-     */
-    virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0;
-
-    /** Method to get the tensor storage declarations
-     *
-     * @return a vector containing the tensor storage declarations
-     */
-    virtual std::vector<GpuTensorStorage> storage_declarations() const = 0;
-};
-
-class ClTensorArgument : public IGpuTensorArgument
-{
-public:
-    ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
-    {
-        _basename                      = name;
-        _format                        = x;
-        _return_by_value_when_possible = return_by_value_when_possible;
-    }
-
-    // Methods to override
-    std::string component(TensorComponentType x) override
-    {
-        if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Constant)))
-        {
-            int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
-            return std::to_string(idx - 1);
-        }
-
-        if (_return_by_value_when_possible)
-        {
-            if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::Dimension)))
-            {
-                int32_t idx = static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentIndex::IndexMask);
-                return std::to_string(_format.shape[idx]);
-            }
-
-            if ((static_cast<int32_t>(x) & static_cast<int32_t>(TensorComponentGroup::FoldedDimension)))
-            {
-                switch (x)
-                {
-                    case TensorComponentType::Dim1xDim2:
-                        return std::to_string(_format.shape[1] * _format.shape[2]);
-                    case TensorComponentType::Dim1xDim2xDim3:
-                        return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]);
-                    default:
-                        std::cout << "Unsupported folded dimension" << std::endl;
-                        assert(false);
-                }
-            }
-        }
-
-        if (std::find(_components_required.begin(), _components_required.end(), x) == _components_required.end())
-        {
-            _components_required.push_back(x);
-        }
-
-        return build_component_name(x);
-    }
-
-    std::string component_type_declaration() const override
-    {
-        return "int";
-    };
-
-    DataType component_data_type() const override
-    {
-        return DataType::Int32;
-    }
-
-    std::string storage(GpuTensorStorage x) override
-    {
-        if (std::find(_storage_required.begin(), _storage_required.end(), x) == _storage_required.end())
-        {
-            _storage_required.push_back(x);
-        }
-
-        return build_storage_name(x);
-    }
-
-    std::string storage_type_declaration(GpuTensorStorage x) const override
-    {
-        switch (x)
-        {
-            case GpuTensorStorage::BufferUint8Ptr:
-                return "__global uchar*";
-            case GpuTensorStorage::Image2dReadOnly:
-                return "__read_only image2d_t";
-            case GpuTensorStorage::Image2dWriteOnly:
-                return "__write_only image2d_t";
-            case GpuTensorStorage::Image3dReadOnly:
-                return "__read_only image3d_t ";
-            case GpuTensorStorage::Image3dWriteOnly:
-                return "__write_only image3d_t ";
-            default:
-                std::cout << "Unsupported storage" << std::endl;
-                assert(false);
-                return "";
-        }
-    };
-
-    std::vector<GpuTensorStorage> storage_declarations() const override
-    {
-        return _storage_required;
-    }
-
-    std::vector<TensorComponentType> component_declarations() const override
-    {
-        return _components_required;
-    }
-
-private:
-    std::string build_storage_name(GpuTensorStorage x) const
-    {
-        std::string var_name = _basename;
-
-        switch (x)
-        {
-            case GpuTensorStorage::BufferUint8Ptr:
-                return var_name + "_ptr";
-            case GpuTensorStorage::Image2dReadOnly:
-            case GpuTensorStorage::Image2dWriteOnly:
-                return var_name + "_img2d";
-            case GpuTensorStorage::Image3dReadOnly:
-            case GpuTensorStorage::Image3dWriteOnly:
-                return var_name + "_img3d";
-            default:
-                std::cout << "Unsupported storage" << std::endl;
-                assert(false);
-        }
-
-        return var_name;
-    }
-
-    std::string build_component_name(TensorComponentType x) const
-    {
-        std::string var_name = _basename;
-
-        switch (x)
-        {
-            case TensorComponentType::OffsetFirstElement:
-                return var_name + "_offset_first_element";
-            case TensorComponentType::Stride1:
-                return var_name + "_stride1";
-            case TensorComponentType::Stride2:
-                return var_name + "_stride2";
-            case TensorComponentType::Stride3:
-                return var_name + "_stride3";
-            case TensorComponentType::Dim0:
-                return var_name + "_dim0";
-            case TensorComponentType::Dim1:
-                return var_name + "_dim1";
-            case TensorComponentType::Dim2:
-                return var_name + "_dim2";
-            case TensorComponentType::Dim3:
-                return var_name + "_dim3";
-            case TensorComponentType::Dim1xDim2:
-                return var_name + "_dim1xdim2";
-            case TensorComponentType::Dim1xDim2xDim3:
-                return var_name + "_dim1xdim2xdim3";
-            default:
-                std::cout << "Unsupported component" << std::endl;
-                assert(false);
-        }
-
-        return var_name;
-    }
-
-    bool                             _return_by_value_when_possible{false};
-    std::vector<GpuTensorStorage>    _storage_required{};
-    std::vector<TensorComponentType> _components_required{};
-};
-
-/**
- * @brief Data structure that contains the declared tiles by the components.
- * The registry is a linear data structure that follows the similar principle of the stack. The user can use the @p increment_registry_level() method to
- * increase the level of the stack (0 when it starts). When the user uses the @p decrement_registry_level() method, the registry decreases the level of the stack
- * and remove (pop) all the tiles from the level above.
- * When a tile is declared on the level 0, it is a global tile. A global tile is visible in all parts of the code.
- * Since different components may use the same name to define a tile, the registry adopts the IdSpace concept, an @p id to prevent name collisions
- * when declaring tiles among different components.
- *
- */
-class GpuTileRegistry
-{
-public:
-    enum class RegistryTileType
-    {
-        Tile,
-        Link
-    };
-
-    using RegistryIdSpace  = int32_t;
-    using RegistryLevel    = int32_t;
-    using RegistryTileName = std::string;
-
-    struct RegistryTileTableEntry
-    {
-        RegistryLevel                registry_level{0};
-        std::unique_ptr<IVectorTile> tile_object{nullptr};
-    };
-
-    struct RegistryTileTypeTableEntry
-    {
-        RegistryTileType tile_type{RegistryTileType::Tile};
-        RegistryTileName tile_name{};
-        RegistryIdSpace  registry_idspace{0};
-        RegistryLevel    registry_level{0};
-    };
-
-    using RegistryTileTable     = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTableEntry>>;
-    using RegistryTileTypeTable = std::map<RegistryIdSpace, std::map<RegistryTileName, RegistryTileTypeTableEntry>>;
-
-    /**
-     * @brief Construct a new Gpu Tile Registry object
-     *
-     */
-    GpuTileRegistry()
-    {
-        _language = GpuTargetLanguage::Unknown;
-    }
-
-    /**
-     * @brief Construct a new Gpu Tile Registry object providing the Gpu programming language
-     *
-     * @param[in] language Gpu programming language to use
-     */
-    GpuTileRegistry(GpuTargetLanguage language)
-    {
-        _language = language;
-    }
-
-    /**
-     * @brief Default destructor. Destroy the Gpu Tile Registry object
-     *
-     */
-    ~GpuTileRegistry() = default;
-
-    /**
-     * @brief Set the working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles.
-     *        Therefore, the IdSpace should be set before declaring any tiles.
-     *
-     * @param[in] id The IdSpace id
-     */
-    void set_IdSpace(int32_t id)
-    {
-        _IdSpace = id;
-    }
-
-    /**
-     * @brief Get the current working IdSpace for the tile registry. IdSpace is used to prevent name collisions when declaring tiles
-     *
-     * @return The IdSpace id
-     */
-    int32_t IdSpace() const
-    {
-        return _IdSpace;
-    }
-
-    /**
-     * @brief Gets all the IdSpace declarations defined in the tile registry.
-     *
-     * @return all the IdSpace declarations defined in the tile registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
-     */
-    std::vector<int32_t> IdSpace_declarations() const
-    {
-        std::vector<int32_t> x;
-
-        auto it = _frags.begin();
-
-        while (it != _frags.end())
-        {
-            x.push_back(it->first);
-
-            it++;
-        }
-
-        return x;
-    }
-
-    /**
-     * @brief Declare a tile from a previously created tile
-     */
-    void insert(const std::string &name, const IVectorTile *frag)
-    {
-        assert(_language == GpuTargetLanguage::OpenCL);
-        const int32_t     key_IdSpace  = _IdSpace;
-        const std::string key_var_name = name;
-        const std::string var_name     = frag->name();
-        TileInfo          format       = frag->format();
-
-        // First check whether a tile with the same name exists
-        IVectorTile *result = (*this)[key_var_name];
-        assert(result == nullptr);
-        if (result == nullptr)
-        {
-            std::unique_ptr<ClTile> tile = std::make_unique<ClTile>(var_name, format);
-
-            _frags[key_IdSpace][key_var_name].tile_object    = std::move(tile);
-            _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
-
-            _frag_types[key_IdSpace][key_var_name].tile_type        = RegistryTileType::Link;
-            _frag_types[key_IdSpace][key_var_name].tile_name        = key_var_name;
-            _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
-            _frag_types[key_IdSpace][key_var_name].registry_level   = _registry_level;
-        }
-    }
-
-    /**
-     * @brief Declare a tile with TileInfo. The tile will be stored in the IdSpace set with @p set_IdSpace()
-     *
-     * @note The reference name used for declaring the tile should not be previously used in the IdSpace
-     *
-     * @param[in] name   Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
-     * @param[in] format Tile format use to use
-     */
-    void insert(const std::string &name, const TileInfo &format)
-    {
-        assert(_language == GpuTargetLanguage::OpenCL);
-        const int32_t     key_IdSpace  = _IdSpace;
-        const std::string key_var_name = name;
-        const std::string var_name     = generate_tile_name(name);
-
-        // First check whether a tile with the same name exists
-        IVectorTile *result = (*this)[key_var_name];
-        assert(result == nullptr);
-        if (result == nullptr)
-        {
-            std::unique_ptr<ClTile> tile                     = std::make_unique<ClTile>(var_name, format);
-            _frags[key_IdSpace][key_var_name].tile_object    = std::move(tile);
-            _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
-
-            _frag_types[key_IdSpace][key_var_name].tile_type        = RegistryTileType::Tile;
-            _frag_types[key_IdSpace][key_var_name].tile_name        = key_var_name;
-            _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
-            _frag_types[key_IdSpace][key_var_name].registry_level   = _registry_level;
-        }
-    }
-
-    /**
-     * @brief Declare a constant tile. The content of the tile is passed as a vector of std::string
-     *
-     * @note The reference name used for declaring the tile should not be previously used in the IdSpace
-     *
-     * @param[in] name Reference name for the tile. The reference name can be used to retrieve the tile stored in the registry.
-     * @param[in] in   A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
-     * @param[in] dt   The data type for the elements stored in the 3D std::vector as std::string. It is user's responsibilty to ensure
-     *                 that the data type is aligned with the content of the std::string.
-     */
-    void insert(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt)
-    {
-        assert(_language == GpuTargetLanguage::OpenCL);
-        const int32_t     key_IdSpace  = _IdSpace;
-        const std::string key_var_name = name;
-
-        // First check whether a tile with the same name exists
-        IVectorTile *result = (*this)[key_var_name];
-        assert(result == nullptr);
-        if (result == nullptr)
-        {
-            std::unique_ptr<ClConstantTile> tile             = std::make_unique<ClConstantTile>(in, dt);
-            _frags[key_IdSpace][key_var_name].tile_object    = std::move(tile);
-            _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
-
-            _frag_types[key_IdSpace][key_var_name].tile_type        = RegistryTileType::Tile;
-            _frag_types[key_IdSpace][key_var_name].tile_name        = key_var_name;
-            _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
-            _frag_types[key_IdSpace][key_var_name].registry_level   = _registry_level;
-        }
-    }
-
-    /**
-     * @brief Declare an anonymous constant tile. The content of the tile is passed as a vector of std::string
-     *
-     * @note This method can be used to declare temporary tiles that need to be accessed only once.
-     *
-     * @param[in] in   A 3D std::vector of std::string. From the 3D std::vector we can know the dimensions for the tile
-     * @param[in] dt   The data type for the elements stored in the 3D std::vector as std::string. It is user responsibilty to ensure
-     *                 that the data type is aligned with what passed with the std::string.
-     *
-     * @return IVectorTile* the anonymous constant tile
-     */
-    IVectorTile *insert(const std::vector<std::vector<std::string>> &in, DataType dt)
-    {
-        assert(_language == GpuTargetLanguage::OpenCL);
-        const int32_t     key_IdSpace  = _IdSpace;
-        const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++);
-
-        // First check whether a tile with the same name exists
-        IVectorTile *result = (*this)[key_var_name];
-        assert(result == nullptr);
-        if (result == nullptr)
-        {
-            std::unique_ptr<ClConstantTile> tile             = std::make_unique<ClConstantTile>(in, dt);
-            _frags[key_IdSpace][key_var_name].tile_object    = std::move(tile);
-            _frags[key_IdSpace][key_var_name].registry_level = _registry_level;
-
-            _frag_types[key_IdSpace][key_var_name].tile_type        = RegistryTileType::Tile;
-            _frag_types[key_IdSpace][key_var_name].tile_name        = key_var_name;
-            _frag_types[key_IdSpace][key_var_name].registry_idspace = _IdSpace;
-            _frag_types[key_IdSpace][key_var_name].registry_level   = _registry_level;
-        }
-
-        return (*this)[key_var_name];
-    }
-
-    /**
-     * @brief Get the tile from the registry. This method searches the tile in the IdSpace provided by the user
-     *
-     * @param[in] name         The name of the tile to retrieve
-     * @param[in] IdSpace The IdSpace id where to search the tile
-     *
-     * @return IVectorTile* The tile
-     */
-    IVectorTile *get(const std::string &name, int32_t IdSpace)
-    {
-        const int32_t     key_IdSpace  = IdSpace;
-        const std::string key_var_name = name;
-
-        IVectorTile *result         = nullptr;
-        auto         search_IdSpace = _frags.find(key_IdSpace);
-        if (search_IdSpace != _frags.end())
-        {
-            auto search_tile = _frags[key_IdSpace].find(key_var_name);
-            if (search_tile != _frags[key_IdSpace].end())
-            {
-                result = search_tile->second.tile_object.get();
-                assert(result != nullptr);
-            }
-        }
-
-        return result;
-    }
-
-    /**
-     * @brief Get the tile from the registry. This method searches the tile in the IdSpace set with @p set_IdSpace()
-     *
-     * @param[in] name The name of the tile to retrieve
-     *
-     * @return IVectorTile* The tile
-     */
-    IVectorTile *operator[](const std::string &name)
-    {
-        return get(name, _IdSpace);
-    }
-
-    /**
-     * @brief Check whether the tile in the in the IdSpace provided by the user exists
-     *
-     * @param[in] name         Name of the tile to search for
-     * @param[in] IdSpace The IdSpace id where to search the tile
-     *
-     * @return true if the tile exists
-     * @return false if the tile does not exist
-     */
-    bool has_tile(const std::string &name, int32_t IdSpace) const
-    {
-        const int32_t     key_IdSpace  = IdSpace;
-        const std::string key_var_name = name;
-
-        // IVectorTile* result = nullptr;
-        auto search_IdSpace = _frags.find(key_IdSpace);
-
-        return search_IdSpace != _frags.end();
-    }
-
-    /**
-     * @brief Check whether the tile within the current IdSpace exists
-     *
-     * @param[in] name Name of the tile to search for
-     *
-     * @return true if the tile exists
-     * @return false if the tile does not exist
-     */
-    bool has_tile(const std::string &name) const
-    {
-        return has_tile(name, _IdSpace);
-    }
-
-    /**
-     * @brief Get all the tiles declared within the IdSpace provided by the user
-     *
-     * @param[in] IdSpace IdSpace where to retrieve all the declared tiles
-     *
-     * @return std::vector<IVectorTile*> A vector with all the declared tiles in the IdSpace provided by the user
-     */
-    std::vector<IVectorTile *> tile_declarations(int32_t IdSpace)
-    {
-        std::vector<IVectorTile *> tiles;
-
-        std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it = _frag_types[IdSpace].begin();
-
-        while (it != _frag_types[IdSpace].end())
-        {
-            // The following line should be enabled. However, we cannot at this stage
-            // because it used to retrieve the output tile produced by each component.
-            // However, this method should NOT be used to retrieve the output tile
-            //if(it->second.tile_type == RegistryTileType::Tile)
-            {
-                tiles.push_back(get(it->second.tile_name, it->second.registry_idspace));
-            }
-            it++;
-        }
-
-        return tiles;
-    }
-
-    /**
-     * @brief Increase the level of stack.
-     *
-     */
-    void increment_registry_level()
-    {
-        _registry_level++;
-    }
-
-    /**
-     * @brief Remove all the tiles declared at the current stack level and decrease the level of the stack.
-     *
-     */
-    void decrement_registry_level()
-    {
-        assert(_registry_level >= 0);
-
-        // Remove all variables in the local scope
-        std::map<RegistryTileName, RegistryTileTableEntry>::iterator it = _frags[_IdSpace].begin();
-
-        while (it != _frags[_IdSpace].end())
-        {
-            if (it->second.registry_level == _registry_level)
-            {
-                it = _frags[_IdSpace].erase(it);
-            }
-            else
-            {
-                it++;
-            }
-        }
-
-        std::map<RegistryTileName, RegistryTileTypeTableEntry>::iterator it_type = _frag_types[_IdSpace].begin();
-
-        while (it_type != _frag_types[_IdSpace].end())
-        {
-            if (it_type->second.registry_level == _registry_level)
-            {
-                it_type = _frag_types[_IdSpace].erase(it_type);
-            }
-            else
-            {
-                it_type++;
-            }
-        }
-
-        _registry_level--;
-    }
-
-    /**
-     * @brief Get the level of the stack
-     *
-     */
-    int32_t level() const
-    {
-        return _registry_level;
-    }
-
-private:
-    // This method ensures that the key is unique among different components
-    std::string generate_tile_name(const std::string &name)
-    {
-        assert(_IdSpace >= 0);
-        if (_registry_level == 0)
-        {
-            return "_G" + std::to_string(_IdSpace) + "_" + name;
-        }
-        else
-        {
-            return name;
-        }
-    }
-
-    RegistryTileTable     _frags{};
-    RegistryTileTypeTable _frag_types{};
-    RegistryLevel         _registry_level{0};
-    RegistryIdSpace       _IdSpace{-1};
-    int32_t               _anonymous_frag_count{0};              // Counter used to create the anonymous tiles
-    GpuTargetLanguage     _language{GpuTargetLanguage::Unknown}; // Gpu programming language
-};
-
-using TensorEntry = std::unique_ptr<IGpuTensorArgument>;
-
-/**
- * @brief Data structure that contains the tensors consumed by the components.
- * Since different components may use the same name as reference for a tensor, the registry adopts the IdSpace concept, an @p id to prevent name collisions
- * when declaring tensors among different components.
- *
- */
-class GpuTensorArgumentRegistry
-{
-public:
-    /**
-     * @brief Construct a new Gpu Tensor Registry object
-     *
-     */
-    GpuTensorArgumentRegistry()
-    {
-        _language = GpuTargetLanguage::Unknown;
-    }
-
-    /**
-     * @brief Construct a new Gpu Tensor Registry object
-     *
-     * @param[in] language Gpu programming language to use
-     */
-    GpuTensorArgumentRegistry(GpuTargetLanguage language)
-    {
-        _language = language;
-    }
-
-    /**
-     * @brief Default destructor. Destroy the Gpu Tensor Registry object
-     *
-     */
-    ~GpuTensorArgumentRegistry() = default;
-
-    /**
-     * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors.
-     *        Therefore, the IdSpace should be set before declaring any tensors.
-     *
-     * @param[in] id The IdSpace id
-     */
-    void set_IdSpace(int32_t id)
-    {
-        _IdSpace = id;
-    }
-
-    /**
-     * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors
-     *
-     * @return The IdSpace id
-     */
-    int32_t IdSpace() const
-    {
-        return _IdSpace;
-    }
-
-    /**
-     * @brief Gets all the IdSpace declarations defined in the tensor registry.
-     *
-     * @return all the IdSpace declarations defined in the tensor registry as std::vector<int32_t>. It returns an empty vector if there are no IdSpace declarations.
-     */
-    std::vector<int32_t> IdSpace_declarations() const
-    {
-        std::vector<int32_t> x;
-
-        auto it = _refs.begin();
-
-        while (it != _refs.end())
-        {
-            x.push_back(it->first);
-
-            it++;
-        }
-
-        return x;
-    }
-
-    /**
-     * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace()
-     *
-     * @note The reference name used for declaring the tensor should not be previously used in the IdSpace
-     *
-     * @param[in] name                          Reference name for the tensor. The reference name can be used to retrieve the tensor stored in the registry.
-     * @param[in] x                             Pair of tensor info and tensor id
-     * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components
-     */
-    void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible)
-    {
-        assert(_language == GpuTargetLanguage::OpenCL);
-        const int32_t     key_IdSpace  = _IdSpace;
-        const int32_t     tensor_id    = x.id;
-        const std::string key_var_name = name;
-        const std::string var_name     = generate_tensor_name(name, tensor_id);
-
-        // First, check whether the tensor has already a reference. If so, trigger an assert
-        assert(!has_tensor_argument(name));
-
-        // Check whether a tensor with that tensorID exists
-        auto result = _tensor_arguments.find(tensor_id);
-        if (result == _tensor_arguments.end())
-        {
-            // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference
-            std::unique_ptr<ClTensorArgument> arg =
-                std::make_unique<ClTensorArgument>(var_name, x, return_by_value_when_possible);
-            _tensor_arguments[tensor_id] = std::move(arg);
-        }
-
-        _refs[key_IdSpace][key_var_name] = tensor_id;
-    }
-
-    /**
-     * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace()
-     *
-     * @param[in] name The name of the tensor to retrieve
-     *
-     * @return IGpuTensor* The tensor
-     */
-    IGpuTensorArgument *operator[](const std::string &name)
-    {
-        const int32_t     key_IdSpace  = _IdSpace;
-        const std::string key_var_name = name;
-
-        IGpuTensorArgument *result         = nullptr;
-        auto                search_IdSpace = _refs.find(key_IdSpace);
-        if (search_IdSpace != _refs.end())
-        {
-            auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
-
-            if (search_tensor_id != _refs[key_IdSpace].end())
-            {
-                const int32_t tensor_id              = search_tensor_id->second;
-                auto          search_tensor_argument = _tensor_arguments.find(tensor_id);
-                if (search_tensor_argument != _tensor_arguments.end())
-                {
-                    result = search_tensor_argument->second.get();
-                }
-                assert(result != nullptr);
-            }
-        }
-
-        return result;
-    }
-
-    /**
-     * @brief Get all the tensors declared in the IdSpace provided by the user
-     *
-     * @return std::vector<IGpuTensorArgument*> A vector with all the declared tensors
-     */
-    std::vector<IGpuTensorArgument *> tensor_argument_declarations()
-    {
-        std::vector<IGpuTensorArgument *> args;
-
-        auto it = _tensor_arguments.begin();
-
-        while (it != _tensor_arguments.end())
-        {
-            args.push_back(it->second.get());
-            it++;
-        }
-
-        return args;
-    }
-
-    /**
-     * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists
-     *
-     * @param[in] name Name of the tensor argument to search for
-     *
-     * @return true if the tensor argument exists
-     * @return false if the tensor argument does not exist
-     */
-    bool has_tensor_argument(const std::string &name)
-    {
-        const int32_t     key_IdSpace  = _IdSpace;
-        const std::string key_var_name = name;
-
-        auto search_IdSpace = _refs.find(key_IdSpace);
-
-        if (search_IdSpace != _refs.end())
-        {
-            auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
-
-            return search_tensor_id != _refs[key_IdSpace].end();
-        }
-        else
-        {
-            return false;
-        }
-    }
-
-    /**
-     * @brief Check whether the tensor argument is in the the IdSpace provided by the user
-     *
-     * @param[in] name    Name of the tensor argument to search for
-     * @param[in] IdSpace The IdSpace id where to search the tensor argument
-     *
-     * @return true if the tile exists
-     * @return false if the tile does not exist
-     */
-    bool has_tensor_argument(const std::string &name, int32_t IdSpace)
-    {
-        const int32_t     key_IdSpace  = IdSpace;
-        const std::string key_var_name = name;
-
-        auto search_IdSpace = _refs.find(key_IdSpace);
-
-        if (search_IdSpace != _refs.end())
-        {
-            auto search_tensor_id = _refs[key_IdSpace].find(key_var_name);
-
-            return search_tensor_id != _refs[key_IdSpace].end();
-        }
-        else
-        {
-            return false;
-        }
-    }
-
-private:
-    // This method ensures that the key is unique among different components
-    std::string generate_tensor_name(const std::string &name, int32_t tensor_id)
-    {
-        assert(tensor_id >= 0);
-
-        return name + std::to_string(tensor_id);
-    }
-
-    std::map<int32_t, TensorEntry>                    _tensor_arguments{};
-    std::map<int32_t, std::map<std::string, int32_t>> _refs{};
-    int32_t                                           _IdSpace{-1};
-    GpuTargetLanguage                                 _language{GpuTargetLanguage::Unknown}; // Gpu programming language
-};
-
-enum class OpType : int32_t
-{
-    Elementwise = 0x0000,
-    Relational  = 0x1000,
-    Algebra     = 0x2000
-};
-
-inline std::string to_string(AssignmentOp op)
-{
-    switch (op)
-    {
-        case AssignmentOp::Decrement:
-            return "-=";
-        case AssignmentOp::Increment:
-            return "+=";
-        default:
-            assert(false);
-            return "";
-    }
-}
-
-inline std::string to_string(UnaryOp op)
-{
-    switch (op)
-    {
-        case UnaryOp::LogicalNot:
-            return "!";
-        case UnaryOp::BitwiseNot:
-            return "~";
-        case UnaryOp::Negate:
-            return "-";
-        default:
-            assert(false);
-            return "";
-    }
-}
-
-inline std::string to_string(BinaryOp op)
-{
-    switch (op)
-    {
-        case BinaryOp::Add:
-            return "+";
-        case BinaryOp::Sub:
-            return "-";
-        case BinaryOp::Mul:
-            return "*";
-        case BinaryOp::Div:
-            return "/";
-        case BinaryOp::Mod:
-            return "%";
-        case BinaryOp::Equal:
-            return "==";
-        case BinaryOp::Less:
-            return "<";
-        case BinaryOp::LessEqual:
-            return "<=";
-        case BinaryOp::Greater:
-            return ">";
-        case BinaryOp::GreaterEqual:
-            return ">=";
-        case BinaryOp::LogicalAnd:
-            return "&&";
-        case BinaryOp::LogicalOr:
-            return "||";
-        case BinaryOp::BitwiseXOR:
-            return "^";
-        default:
-            assert(false);
-            return "";
-    }
-}
-
-inline std::string binary_op_string(BinaryOp op)
-{
-    switch (op)
-    {
-        case BinaryOp::Add:
-            return "add";
-        case BinaryOp::Sub:
-            return "sub";
-        case BinaryOp::Mul:
-            return "mul";
-        case BinaryOp::Div:
-            return "div";
-        case BinaryOp::Mod:
-            return "mod";
-        case BinaryOp::Equal:
-            return "eq";
-        case BinaryOp::Less:
-            return "gt";
-        case BinaryOp::LessEqual:
-            return "gteq";
-        case BinaryOp::Greater:
-            return "lt";
-        case BinaryOp::GreaterEqual:
-            return "lte";
-        default:
-            assert(false);
-            return "";
-    }
-}
-
-enum class OperandType : int32_t
-{
-    Unknown              = 0x00000000,
-    ScalarFp32           = 0x00001011, // Immediate scalar tile
-    ScalarFp16           = 0x00001012, // Immediate scalar tile
-    ScalarInt32          = 0x00001021, // Immediate scalar tile
-    ScalarInt16          = 0x00001022, // Immediate scalar tile
-    ScalarInt8           = 0x00001024, // Immediate scalar tile
-    ScalarUInt32         = 0x00001031, // Immediate scalar tile
-    ScalarUInt16         = 0x00001032, // Immediate scalar tile
-    ScalarUInt8          = 0x00001034, // Immediate scalar tile
-    ScalarBool           = 0x00001041, // Immediate scalar tile
-    ScalarTile           = 0x00001050, // Scalar from a tile
-    Tile                 = 0x00010000, // Tile
-    TensorStride1        = 0x00100001, // Tensor component
-    TensorStride2        = 0x00100002, // Tensor component
-    TensorStride3        = 0x00100003, // Tensor component
-    TensorStride4        = 0x00100004, // Tensor component
-    TensorDim0           = 0x00100010, // Tensor component
-    TensorDim1           = 0x00100020, // Tensor component
-    TensorDim2           = 0x00100030, // Tensor component
-    TensorDim3           = 0x00100040, // Tensor component
-    TensorDim4           = 0x00100050, // Tensor component
-    TensorC              = 0x00100010, // Tensor component
-    TensorW              = 0x00100020, // Tensor component
-    TensorH              = 0x00100030, // Tensor component
-    TensorD              = 0x00100040, // Tensor component
-    TensorN              = 0x00100050, // Tensor component
-    TensorDim1xDim2      = 0x00100100, // Tensor component
-    TensorDim1xDim2xDim3 = 0x00100200, // Tensor component
-    TensorWxH            = 0x00100300, // Tensor component
-    TensorWxHxD          = 0x00100400, // Tensor component
-    TensorDataOffset     = 0x00100500, // Tensor component
-};
-
-struct ScalarTileCoord
-{
-    ScalarTileCoord()
-    {
-    }
-
-    ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0)
-    {
-    }
-
-    int32_t x{-1};
-    int32_t y{-1};
-};
-
-/**
- * @brief Operand class. This object is used to pass the operands to the operations performed by the writer.
- * Operand can be of three types:
- * -# Scalar immediate: constant expression
- * -# Tile: A tile
- * -# Tensor component: A component (scalar) of a tensor
- *
- */
-class Operand
-{
-public:
-    Operand(const std::string &val)
-    {
-        _str  = val;
-        _type = OperandType::Tile;
-    }
-
-    Operand(const std::string &val, const ScalarTileCoord &coord)
-    {
-        _str   = val;
-        _type  = OperandType::ScalarTile;
-        _coord = coord;
-    }
-
-    Operand(const std::string &val, OperandType type)
-    {
-        _str  = val;
-        _type = type;
-    }
-
-    Operand(const Operand &t)
-    {
-        _str  = t.value();
-        _type = t.type();
-    }
-
-    Operand &operator=(const Operand &t)
-    {
-        _str   = t.value();
-        _type  = t.type();
-        _coord = t.scalar_tile_coordinate();
-        return *this;
-    }
-
-    std::string value() const
-    {
-        return _str;
-    }
-
-    OperandType type() const
-    {
-        return _type;
-    }
-
-    ScalarTileCoord scalar_tile_coordinate() const
-    {
-        return _coord;
-    }
-
-private:
-    std::string     _str{};
-    OperandType     _type{OperandType::Unknown};
-    ScalarTileCoord _coord{};
-};
-
-using GpuSamplerTensorStorage = GpuTensorStorage;
-
-struct GpuSampler
-{
-    GpuSampler() = default;
-
-    TensorSamplerFormat       format{TensorSamplerFormat::Unknown};
-    GpuSamplerTensorStorage   storage{GpuSamplerTensorStorage::Unknown};
-    TensorSamplerAddressModeX address_mode_x{TensorSamplerAddressModeX::Unknown};
-    TensorSamplerAddressModeY address_mode_y{TensorSamplerAddressModeY::Unknown};
-    TensorSamplerAddressModeZ address_mode_z{TensorSamplerAddressModeZ::Unknown};
-};
-
-inline GpuSampler create_simple_sampler(
-    const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z)
-{
-    CKW_UNUSED(step_x, step_y, step_z);
-
-    auto tensor = tensor_info_id->shape;
-
-    GpuSampler dst_sampler;
-    dst_sampler.format         = sampler.format;
-    dst_sampler.storage        = GpuSamplerTensorStorage::BufferUint8Ptr;
-    dst_sampler.address_mode_x = sampler.address_mode_x;
-    dst_sampler.address_mode_y = sampler.address_mode_y;
-    dst_sampler.address_mode_z = sampler.address_mode_z;
-
-    int32_t dim_x = 0;
-    int32_t dim_y = 0;
-    int32_t dim_z = 0;
-
-    switch (sampler.format)
-    {
-        case TensorSamplerFormat::C_W_H:
-            dim_x = tensor[0];
-            dim_y = tensor[1];
-            dim_z = tensor[2];
-            break;
-        case TensorSamplerFormat::C_WH_1:
-            dim_x = tensor[0];
-            dim_y = tensor[1] * tensor[2];
-            dim_z = 1;
-            break;
-        default:
-            std::cout << "Unsupported tensor format" << std::endl;
-            assert(false);
-            break;
-    }
-
-    if (dim_x == 1)
-    {
-        assert(step_x == 1);
-        dst_sampler.address_mode_x = TensorSamplerAddressModeX::None;
-    }
-
-    if (dim_y == 1)
-    {
-        assert(step_y == 1);
-        dst_sampler.address_mode_y = TensorSamplerAddressModeY::None;
-    }
-
-    if (dim_z == 1)
-    {
-        assert(step_z == 1);
-        dst_sampler.address_mode_z = TensorSamplerAddressModeZ::None;
-    }
-
-    return dst_sampler;
-}
-
-class GpuOutputSampler
-{
-public:
-    GpuOutputSampler() = default;
-
-    /**
-     * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once
-     *        by the root component. Once initialized, all simpler components will need to used this sampler
-     *        or a broadcasted version of it
-     *
-     * @param[in] sampler GpuSampler
-     * @param[in] step_x  Increment step in the X direction. Not necessarily it is the same of n0 of tile!
-     * @param[in] step_y  Increment step in the Y direction. Not necessarily it is the same of m0 of tile!
-     * @param[in] step_z  Increment step in the Z direction. Not necessarily it is the same of d0 of tile!
-     */
-    void initialize(const TensorInfo       *tensor_info_id,
-                    GpuSamplerTensorStorage tensor_storage,
-                    TensorSamplerFormat     tensor_format,
-                    int32_t                 step_x,
-                    int32_t                 step_y,
-                    int32_t                 step_z)
-    {
-        assert(_is_initialized == false);
-
-        _step_x         = step_x;
-        _step_y         = step_y;
-        _step_z         = step_z;
-        _tensor_info_id = tensor_info_id;
-        _sampler        = create_sampler(tensor_storage, tensor_format);
-        _is_initialized = true;
-    };
-
-    GpuSampler sampler() const
-    {
-        return _sampler;
-    };
-
-    int32_t step_x() const
-    {
-        return _step_x;
-    };
-
-    int32_t step_y() const
-    {
-        return _step_y;
-    };
-
-    int32_t step_z() const
-    {
-        return _step_z;
-    };
-
-private:
-    GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format)
-    {
-        // Output can only be in output mode
-        assert(tensor_storage != GpuSamplerTensorStorage::Image2dReadOnly);
-        assert(tensor_storage != GpuSamplerTensorStorage::Image3dReadOnly);
-
-        auto tensor = _tensor_info_id->shape;
-
-        GpuSampler sampler;
-        sampler.format         = tensor_format;
-        sampler.storage        = tensor_storage;
-        sampler.address_mode_x = TensorSamplerAddressModeX::None;
-        sampler.address_mode_y = TensorSamplerAddressModeY::None;
-        sampler.address_mode_z = TensorSamplerAddressModeZ::None;
-
-        // In the case of texture, we do not need any special checks at the border
-        if (tensor_storage == GpuSamplerTensorStorage::BufferUint8Ptr)
-        {
-            int32_t dim_x = 0;
-            int32_t dim_y = 0;
-            int32_t dim_z = 0;
-
-            switch (tensor_format)
-            {
-                case TensorSamplerFormat::C_W_H:
-                    dim_x = tensor[0];
-                    dim_y = tensor[1];
-                    dim_z = tensor[2];
-                    break;
-                case TensorSamplerFormat::C_WH_1:
-                    dim_x = tensor[0];
-                    dim_y = tensor[1] * tensor[2];
-                    dim_z = 1;
-                    break;
-                default:
-                    std::cout << "Unsupported tensor format" << std::endl;
-                    assert(false);
-                    break;
-            }
-
-            if ((dim_x % _step_x) != 0 && dim_x != 1)
-            {
-                sampler.address_mode_x = TensorSamplerAddressModeX::OverlappingMin;
-            }
-
-            if ((dim_y % _step_y) != 0 && dim_y != 1)
-            {
-                sampler.address_mode_y = TensorSamplerAddressModeY::ClampToMaxEdgeOnly;
-            }
-
-            if ((dim_z % _step_z) != 0 && dim_z != 1)
-            {
-                sampler.address_mode_z = TensorSamplerAddressModeZ::ClampToMaxEdgeOnly;
-            }
-        }
-
-        return sampler;
-    }
-
-    GpuSampler        _sampler{}; // GpuSampler
-    int32_t           _step_x{1};
-    int32_t           _step_y{1};
-    int32_t           _step_z{1};
-    const TensorInfo *_tensor_info_id{nullptr};
-    bool              _is_initialized{false};
-};
-
-/**
- * @brief Tensor operand class. This object is used to pass the operands as tensor to the operations performed by the writer.
- */
-class TensorOperand
-{
-public:
-    TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler)
-    {
-    }
-
-    TensorOperand &operator=(const TensorOperand &t)
-    {
-        _str     = t.value();
-        _sampler = t.sampler();
-        return *this;
-    }
-
-    std::string value() const
-    {
-        return _str;
-    }
-
-    GpuSampler sampler() const
-    {
-        return _sampler;
-    }
-
-private:
-    std::string _str{};
-    GpuSampler  _sampler{};
-};
-
-/**
- * @brief Data structure that contains all the necessary information to write the Gpu kernel with the Gpu kernel Writer
- *        This data structure must be initialized before being passed to the Gpu Kernel Writer
- *
- */
-class GpuKernelWriterDataHolder
-{
-public:
-    /**
-     * @brief Construct a new Gpu Kernel Data object. In this phase, we should also store
-     *        the GPU target and target specific capabilities (extensions). For now, we just initialize the
-     *        programming language
-     *
-     * @param[in] language Gpu programming language to use
-     */
-    GpuKernelWriterDataHolder(GpuTargetLanguage language)
-        : tiles(language), arguments(language), code(""), _language(language)
-    {
-    }
-
-    /**
-     * @brief Get the Gpu programming language used
-     *
-     * @return GpuTargetLanguage the Gpu programming language
-     */
-    GpuTargetLanguage programming_language() const
-    {
-        return _language;
-    }
-
-    /**
-     * @brief @ref GpuTileRegistry
-     *
-     */
-    GpuTileRegistry tiles{};
-    /**
-     * @brief @ref GpuTensorArgumentRegistry
-     *
-     */
-    GpuTensorArgumentRegistry arguments{};
-    /**
-     * @brief @ref GpuOutputSampler.
-     *
-     */
-    GpuOutputSampler output_sampler{};
-    /**
-     * @brief Source code
-     *
-     */
-    std::string code{};
-
-    // GpuExtensionRegistry extensions{};
-private:
-    GpuTargetLanguage _language;
-};
-
-struct LWS
-{
-    int32_t x{1};
-    int32_t y{1};
-    int32_t z{1};
-};
-
-/**
- * @brief Utility class used to get the tile from the operand. If the operand is not a tile, @ref OperandUnpacker
- *        declare an anonymous tile in the tile registry.
- */
-class OperandUnpacker
-{
-public:
-    OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) : _tiles(tiles), _arguments(arguments)
-    {
-        // Increase the level of the stack to allocate possible temporary tiles
-        _tiles.increment_registry_level();
-    };
-
-    ~OperandUnpacker()
-    {
-        // Decrease the level of the stack to deallocate any temporary tiles
-        _tiles.decrement_registry_level();
-    }
-
-    IVectorTile *unpack(const Operand &src)
-    {
-        // Get the tile
-        if (src.type() == OperandType::Tile)
-        {
-            assert(_tiles.has_tile(src.value()));
-            return _tiles[src.value()];
-        }
-        // Create an anonymous tile with a constant
-        else if (static_cast<int32_t>(src.type()) & 0x00001000)
-        {
-            if (src.type() == OperandType::ScalarTile)
-            {
-                ScalarTileCoord coord = src.scalar_tile_coordinate();
-                assert(_tiles.has_tile(src.value()));
-                assert(coord.x >= 0);
-                assert(coord.y >= 0);
-                auto val = _tiles[src.value()]->scalar(coord.x, coord.y);
-                return _tiles.insert({{{val.str}}}, val.type.dt);
-            }
-            else
-            {
-                return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type()));
-            }
-        }
-        // Create an anonymous tile with the tensor component
-        else
-        {
-            assert(_arguments.has_tensor_argument(src.value()));
-            auto              x   = _arguments[src.value()];
-            const std::string val = x->component(to_tensor_component(src.type()));
-            const DataType    dt  = x->component_data_type();
-            return _tiles.insert({{{val}}}, dt);
-        }
-    }
-
-private:
-    DataType to_tile_data_type(OperandType x)
-    {
-        return static_cast<DataType>(static_cast<int32_t>(x) & 0x00ff);
-    }
-
-    TensorComponentType to_tensor_component(OperandType x)
-    {
-        switch (x)
-        {
-            case OperandType::TensorDim0:
-                return TensorComponentType::Dim0;
-            case OperandType::TensorDim1:
-                return TensorComponentType::Dim1;
-            case OperandType::TensorDim2:
-                return TensorComponentType::Dim2;
-            case OperandType::TensorDim3:
-                return TensorComponentType::Dim3;
-            case OperandType::TensorDim4:
-                return TensorComponentType::Dim4;
-            case OperandType::TensorStride1:
-                return TensorComponentType::Stride1;
-            case OperandType::TensorStride2:
-                return TensorComponentType::Stride2;
-            case OperandType::TensorStride3:
-                return TensorComponentType::Stride3;
-            case OperandType::TensorStride4:
-                return TensorComponentType::Stride4;
-            case OperandType::TensorDim1xDim2:
-                return TensorComponentType::Dim1xDim2;
-            case OperandType::TensorDim1xDim2xDim3:
-                return TensorComponentType::Dim1xDim2xDim3;
-            case OperandType::TensorDataOffset:
-                return TensorComponentType::OffsetFirstElement;
-            default:
-                assert(false);
-                return TensorComponentType::Unknown;
-        }
-    }
-
-    GpuTileRegistry           &_tiles;
-    GpuTensorArgumentRegistry &_arguments;
-};
-
-/**
- * @brief Utility class used to get the tensor argument from the operand. If the operand is not a tile, @ref OperandUnpacker
- *        declare an anonymous tile in the tile registry.
- *        Tensor dimension reduction aims for reducing the tensor data dimension while keeping data's tensor structure.
- */
-class TensorOperandUnpacker
-{
-public:
-    TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) : _arguments(arguments){};
-
-    IGpuTensorArgument *unpack(const TensorOperand &src)
-    {
-        assert(_arguments.has_tensor_argument(src.value()));
-        return _arguments[src.value()];
-    }
-
-private:
-    GpuTensorArgumentRegistry &_arguments;
-};
-
-/**
- * @brief The GpuKernel will be used in three occasions (stages):
- * #- Compilation stage
- * #- Tuning stage
- * #- Dispatch stage
- */
-struct GpuKernel
-{
-    // Compilation stage
-    std::string                code{};            // Source code, required for the compilation stage
-    std::vector<GpuExtensions> list_extensions{}; // Extensions, required for the compilation stage
-    // Tuning stage
-    std::string      config_id{}; // Unique id, required for the tuning stage
-    std::vector<LWS> list_lws{};  // LWS to test, required for the tuning stage
-    // Dispatch stage
-    GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage
-    std::vector<std::pair<int32_t, GpuTensorStorage>>
-        list_tensor_storages; // List of tensor storages, required for the dispatch stage
-    std::vector<std::pair<int32_t, TensorComponentType>>
-        list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage)
-};
-
-// Generate all extension pragmas (hardcoded for now)
-inline std::string generate_extensions()
-{
-    std::string ext = R"(
-#if defined(cl_khr_fp16)
-#pragma OPENCL EXTENSION cl_khr_fp16 : enable
-#endif // defined(cl_khr_fp16)
-
-#if defined(cl_arm_integer_dot_product_int8)
-#pragma OPENCL EXTENSION cl_arm_integer_dot_product_int8 : enable
-#endif // defined(cl_arm_integer_dot_product_int8)
-
-#if defined(cl_arm_integer_dot_product_accumulate_int8)
-#pragma OPENCL EXTENSION cl_arm_integer_dot_product_accumulate_int8 : enable
-#endif // defined(cl_arm_integer_dot_product_accumulate_int8)
-
-#if defined(cl_arm_printf)
-#pragma OPENCL EXTENSION cl_arm_printf : enable
-#endif // defined(cl_arm_printf);
-)";
-    return ext;
-}
-
-// This function should produce an object with the source
-inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name)
-{
-    std::string code;
-    code += generate_extensions();
-    code += "__kernel void ";
-    code += name;
-    code += "(\n";
-
-    auto IdSpaces = in.arguments.IdSpace_declarations();
-
-    std::vector<std::string> arg_str;
-
-    auto tensor_args = in.arguments.tensor_argument_declarations();
-
-    for (auto &i : tensor_args)
-    {
-        // For each tensor used, get the storage and tensor components
-        auto storages   = i->storage_declarations();
-        auto components = i->component_declarations();
-
-        for (auto &y : storages)
-        {
-            std::string str;
-            str += i->storage_type_declaration(y);
-            str += " ";
-            str += i->storage(y);
-            arg_str.push_back(str);
-        }
-
-        for (auto &y : components)
-        {
-            std::string str;
-            str += i->component_type_declaration();
-            str += " ";
-            str += i->component(y);
-            arg_str.push_back(str);
-        }
-    }
-
-    for (size_t i = 0; i < arg_str.size(); ++i)
-    {
-        code += arg_str[i];
-        if (i + 1 < arg_str.size())
-        {
-            code += ",\n";
-        }
-    }
-
-    code += ")\n";
-    code += "{\n";
-    code += in.code;
-    code += "}\n";
-
-    return code;
-}
-
-/**
- * @brief This class is responsible to map a N-Tensor to a 3d tensor. The mapper needs the GpuSampler to know
- * how to reduce the dimensionality of a tensor
- *
- */
-class GpuTensor3dMapper
-{
-public:
-    GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor){};
-
-    std::string tensor_component_x() const
-    {
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-            case TensorSamplerFormat::C_W_H:
-                return _tensor->component(TensorComponentType::Dim0);
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    std::string tensor_component_y() const
-    {
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-                return _tensor->component(TensorComponentType::Dim1xDim2);
-            case TensorSamplerFormat::C_W_H:
-                return _tensor->component(TensorComponentType::Dim1);
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    std::string tensor_component_z() const
-    {
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-                return "1";
-            case TensorSamplerFormat::C_W_H:
-                return _tensor->component(TensorComponentType::Dim2);
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    std::string tensor_component_stride_y() const
-    {
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-            case TensorSamplerFormat::C_W_H:
-                return _tensor->component(TensorComponentType::Stride1);
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    std::string tensor_component_stride_z() const
-    {
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-                return "0";
-            case TensorSamplerFormat::C_W_H:
-                return _tensor->component(TensorComponentType::Stride2);
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    std::string tensor_component_stride_batch() const
-    {
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-            case TensorSamplerFormat::C_W_H:
-                return _tensor->component(TensorComponentType::Stride3);
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    bool is_one_component_x() const
-    {
-        auto       t      = _tensor->format();
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-            case TensorSamplerFormat::C_W_H:
-                return t.shape[0] == 1;
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    bool is_one_component_y() const
-    {
-        auto       t      = _tensor->format();
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-                return (t.shape[1] * t.shape[2]) == 1;
-            case TensorSamplerFormat::C_W_H:
-                return t.shape[1] == 1;
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    bool is_one_component_z() const
-    {
-        auto       t      = _tensor->format();
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-                return true;
-            case TensorSamplerFormat::C_W_H:
-                return t.shape[2] == 1;
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    bool is_one_component_batch() const
-    {
-        auto       t      = _tensor->format();
-        const auto format = _sampler.format;
-        switch (format)
-        {
-            case TensorSamplerFormat::C_WH_1:
-            case TensorSamplerFormat::C_W_H:
-                return t.shape[3] == 1;
-            default:
-                std::cout << "Unsupported tensor format" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    GpuSampler gpu_sampler() const
-    {
-        return _sampler;
-    }
-
-    IGpuTensorArgument *tensor_argument() const
-    {
-        return _tensor;
-    }
-
-private:
-    GpuSampler          _sampler;
-    IGpuTensorArgument *_tensor;
-};
-
-struct GpuKernelWriterAttribute
-{
-    bool return_tensor_component_by_value{false};
-};
-
-enum class RoundingMode
-{
-    None,
-    Rte,
-    Rtz,
-    Rtp,
-    Rtn
-};
-
-// https://llvm.org/docs/tutorial/MyFirstLanguageFrontend/LangImpl05.html
-class IGpuKernelWriter
-{
-public:
-    virtual ~IGpuKernelWriter() = default;
-
-    virtual void set_IdSpace(int32_t id) = 0;
-
-    virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0;
-
-    virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0;
-
-    virtual void declare_tile(const std::string &name, const TileInfo &info) = 0;
-
-    virtual void
-    declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) = 0;
-
-    virtual void write_text(const std::string &x) = 0;
-
-    virtual void compound_statement_begin() = 0;
-
-    virtual void compound_statement_end() = 0;
-
-    // Operations
-    virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0;
-
-    virtual void
-    op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0;
-
-    virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0;
-
-    virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0;
-
-    virtual void op_unary_expression(const Operand &dst, UnaryOp op, const Operand &src) = 0;
-
-    virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
-
-    virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0;
-
-    virtual void
-    op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) = 0;
-
-    virtual void op_binary_elementwise_function(const Operand &dst_name,
-                                                BinaryFunction func,
-                                                const Operand &first_name,
-                                                const Operand &second_name) = 0;
-
-    virtual void op_ternary_elementwise_function(const Operand  &dst_name,
-                                                 TernaryFunction func,
-                                                 const Operand  &first_name,
-                                                 const Operand  &second_name,
-                                                 const Operand  &third_name) = 0;
-
-    virtual void op_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
-
-    virtual void op_else_if_header(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0;
-
-    virtual void op_else_header() = 0;
-
-    virtual void op_for_loop_header(const Operand &var_name,
-                                    BinaryOp       cond_op,
-                                    const Operand &cond_value,
-                                    const Operand &update_var,
-                                    AssignmentOp   update_op,
-                                    const Operand &update_value) = 0;
-
-    virtual void op_load_indirect(const TensorOperand &tensor,
-                                  const Operand       &dst,
-                                  const Operand       &x,
-                                  const Operand       &y_indirect,
-                                  const Operand       &z,
-                                  const Operand       &b = Operand("0", OperandType::ScalarInt32)) = 0;
-
-    virtual void op_load_immediate(const TensorOperand &tensor,
-                                   const Operand       &dst,
-                                   const Operand       &x,
-                                   const Operand       &y,
-                                   const Operand       &z,
-                                   const Operand       &b          = Operand("0", OperandType::ScalarInt32),
-                                   const Operand       &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0;
-
-    virtual void op_store_immediate(const TensorOperand &tensor,
-                                    const Operand       &src,
-                                    const Operand       &x,
-                                    const Operand       &y,
-                                    const Operand       &z,
-                                    const Operand       &b = Operand("0", OperandType::ScalarInt32)) = 0;
-
-    virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0;
-
-    virtual void op_return() = 0;
-
-    // Utils
-    // It is the process of converting
-    virtual void util_get_indirect_buffer(const Operand       &dst,
-                                          const TensorOperand &tensor,
-                                          const Operand       &x,
-                                          const Operand       &y,
-                                          const Operand       &x_off,
-                                          const Operand       &y_off) = 0;
-};
-
-enum class GpuLoadStoreType
-{
-    Load  = 1,
-    Store = 2
-};
-
-class IGpuLoadStoreHelperWriter
-{
-public:
-    IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type)
-        : _writer(x), _mapper(mapper), _type(type)
-    {
-    }
-
-    IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default;
-
-    IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default;
-
-    virtual ~IGpuLoadStoreHelperWriter() = default;
-
-    virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0;
-
-    virtual void write(const std::pair<int32_t, std::string> &y) = 0;
-
-    virtual void finalize() = 0;
-
-protected:
-    IGpuKernelWriter *_writer;
-    GpuTensor3dMapper _mapper;
-    GpuLoadStoreType  _type;
-};
-
-class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter
-{
-public:
-    ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
-        : IGpuLoadStoreHelperWriter(x, mapper, type)
-    {
-    }
-
-    ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default;
-
-    ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default;
-
-    static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst)
-    {
-        CKW_UNUSED(x, type, dst);
-
-        if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::BufferUint8Ptr)
-        {
-            return false;
-        }
-        return true;
-    }
-
-    void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
-    {
-        assert(validate(_writer, _mapper, _type, dst));
-
-        _dst           = dst;
-        _ls_width_full = dst->format().w;
-
-        _coord_x      = x->scalar(0, 0).str;
-        _coord_z      = z->scalar(0, 0).str;
-        _coord_b      = b->scalar(0, 0).str;
-        _coord_orig_z = _coord_z;
-
-        out_of_bound_initialize_x(_coord_x);
-        out_of_bound_initialize_z(_coord_z);
-
-        /*
-        meaning of else:
-        - x: partial load/store
-        - y: no load/store operation
-        - z: no load/store operation
-        if(x)
-        {
-            if(z)
-            {
-                if(y)
-                {
-                    // full load/store width
-                }
-                else
-                {
-                    // no load/store
-                }
-            }
-            else
-            {
-                // no load/store
-            }
-        }
-        else
-        {
-            if(z)
-            {
-                if(y)
-                {
-                    // partial load/store width
-                }
-                else
-                {
-                    // no load/store
-                }
-            }
-            else
-            {
-                // no load/store
-            }
-        }
-        */
-    }
-
-    void write(const std::pair<int32_t, std::string> &y) override
-    {
-        int32_t     idx_y   = y.first;
-        std::string coord_y = y.second;
-
-        // The only check required is on Y.
-        out_of_bound_initialize_y(coord_y);
-
-        const std::string dst     = _dst->vector(idx_y).str;
-        const std::string address = to_ls_buffer_address(_coord_x, coord_y, _coord_z, _coord_b);
-        const std::string ls_buf  = to_ls_buffer(_type, _ls_width_full, dst, address);
-
-        _writer->write_text(ls_buf);
-        _writer->write_text(";\n");
-
-        out_of_bound_finalize_y(dst);
-
-        // The left over load/store will be written in the finalize stage
-        if (_ls_width_part.size() != 0)
-        {
-            int32_t w = 0;
-            for (auto &p : _ls_width_part)
-            {
-                const std::string dst0    = _dst->vector(w, p, idx_y).str;
-                const std::string coord_x = _coord_x + " + " + std::to_string(w);
-                const std::string address = to_ls_buffer_address(coord_x, coord_y, _coord_z, _coord_b);
-                const std::string ls_buf0 = to_ls_buffer(_type, p, dst0, address);
-                _leftovers_x.push_back(std::make_pair(std::make_pair(dst0, coord_y), ls_buf0));
-
-                w += p;
-            }
-        }
-    }
-
-    void finalize() override
-    {
-        out_of_bound_finalize_z();
-        out_of_bound_finalize_x();
-    }
-
-private:
-    IVectorTile                                                             *_dst{nullptr};
-    int32_t                                                                  _ls_width_full{0};
-    std::vector<int32_t>                                                     _ls_width_part{};
-    std::vector<std::pair<std::pair<std::string, std::string>, std::string>> _leftovers_x{};
-    std::string                                                              _coord_x{};
-    std::string                                                              _coord_z{};
-    std::string                                                              _coord_orig_z{};
-    std::string                                                              _coord_b{};
-
-    void out_of_bound_initialize_x(std::string &coord)
-    {
-        if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
-        {
-            auto tensor_format = _mapper.tensor_argument()->format();
-            auto shape         = tensor_format.shape;
-
-            _ls_width_part = decompose_leftover_ls_vector_width(shape[0] % _ls_width_full);
-            if (_ls_width_part.size() != 0)
-            {
-                _writer->write_text("if(" + coord + " > 0)\n");
-                _writer->compound_statement_begin();
-            }
-        }
-    };
-
-    void out_of_bound_finalize_x()
-    {
-        if (_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
-        {
-            if (_ls_width_part.size() != 0)
-            {
-                _writer->compound_statement_end();
-                _writer->write_text("else\n");
-                _writer->compound_statement_begin();
-
-                out_of_bound_initialize_z(_coord_orig_z);
-                for (auto &i : _leftovers_x)
-                {
-                    out_of_bound_initialize_y(i.first.second);
-                    _writer->write_text(i.second);
-                    _writer->write_text(";\n");
-                    out_of_bound_finalize_y(i.first.first);
-                }
-                out_of_bound_finalize_z();
-                _writer->compound_statement_end();
-            }
-        }
-    };
-
-    void out_of_bound_initialize_y(std::string &coord)
-    {
-        std::string max = "";
-
-        const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
-        switch (address_mode_y)
-        {
-            case TensorSamplerAddressModeY::Skip:
-            case TensorSamplerAddressModeY::ClampToBorder:
-                // NOTE: This line should not be moved outside of the switch statement.
-                // The reason for that is because when we query the component, the component is marked as used
-                // and added to the list of arguments of the kernel. Since, not in all cases this component is required,
-                // we should request the component only when used
-                max = _mapper.tensor_component_y();
-                _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
-                _writer->compound_statement_begin();
-                break;
-            case TensorSamplerAddressModeY::SkipMinEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
-                _writer->write_text("if(" + coord + " >= 0)\n");
-                _writer->compound_statement_begin();
-                break;
-            case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
-                max = _mapper.tensor_component_y();
-                _writer->write_text("if(" + coord + " < " + max + ")\n");
-                _writer->compound_statement_begin();
-                break;
-            case TensorSamplerAddressModeY::ClampToNearest:
-                max   = _mapper.tensor_component_y();
-                coord = "clamp(" + coord + ", 0, " + max + " - 1)";
-                break;
-            case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
-                max   = _mapper.tensor_component_y();
-                coord = "min(" + coord + ", " + max + " - 1)";
-                break;
-            case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
-                coord = "max(" + coord + ", 0)";
-                break;
-            case TensorSamplerAddressModeY::None:
-                break;
-            default:
-                std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
-                assert(false);
-        }
-    };
-
-    void out_of_bound_finalize_y(const std::string &dst)
-    {
-        const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
-        switch (address_mode_y)
-        {
-            case TensorSamplerAddressModeY::ClampToBorder:
-            case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
-            case TensorSamplerAddressModeY::Skip:
-            case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
-            case TensorSamplerAddressModeY::SkipMinEdgeOnly:
-                _writer->compound_statement_end();
-                break;
-            case TensorSamplerAddressModeY::None:
-                break;
-
-            default:
-                assert(false);
-        }
-
-        switch (address_mode_y)
-        {
-            case TensorSamplerAddressModeY::ClampToBorder:
-            case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
-                _writer->write_text("else\n");
-                _writer->compound_statement_begin();
-                _writer->write_text(dst);
-                _writer->write_text(" = 0.0f;\n");
-                _writer->compound_statement_end();
-                break;
-            case TensorSamplerAddressModeY::None:
-                break;
-
-            default:
-                assert(false);
-        }
-    };
-
-    void out_of_bound_initialize_z(std::string &coord)
-    {
-        std::string max = "";
-
-        const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
-
-        switch (address_mode_z)
-        {
-            case TensorSamplerAddressModeZ::Skip:
-                max = _mapper.tensor_component_z();
-                _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
-                _writer->compound_statement_begin();
-                break;
-            case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
-                _writer->write_text("if(" + coord + " >= 0)\n");
-                _writer->compound_statement_begin();
-                break;
-            case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
-                max = _mapper.tensor_component_z();
-                _writer->write_text("if(" + coord + " < " + max + ")\n");
-                _writer->compound_statement_begin();
-                break;
-            case TensorSamplerAddressModeZ::ClampToNearest:
-                max   = _mapper.tensor_component_z();
-                coord = "clamp(" + coord + ", 0, " + max + " - 1)";
-                break;
-            case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly:
-                max   = _mapper.tensor_component_z();
-                coord = "min(" + coord + ", " + max + " - 1)";
-                break;
-            case TensorSamplerAddressModeZ::ClampToMinEdgeOnly:
-                coord = "max(" + coord + ", 0)";
-                break;
-            case TensorSamplerAddressModeZ::None:
-                break;
-            default:
-                std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl;
-                assert(false);
-        }
-    };
-
-    void out_of_bound_finalize_z()
-    {
-        const auto address_mode_z = _mapper.gpu_sampler().address_mode_z;
-
-        switch (address_mode_z)
-        {
-            case TensorSamplerAddressModeZ::Skip:
-            case TensorSamplerAddressModeZ::SkipMinEdgeOnly:
-            case TensorSamplerAddressModeZ::SkipMaxEdgeOnly:
-                _writer->compound_statement_end();
-                break;
-            case TensorSamplerAddressModeZ::None:
-                break;
-
-            default:
-                assert(false);
-        }
-    };
-
-    std::vector<int32_t> decompose_leftover_ls_vector_width(int32_t ls_leftover_vector_width) const
-    {
-        std::vector<int32_t> x;
-
-        switch (ls_leftover_vector_width)
-        {
-            case 0:
-                break;
-            case 1:
-            case 2:
-            case 3:
-            case 4:
-            case 8:
-            case 16:
-                x.push_back(ls_leftover_vector_width);
-                break;
-            case 5:
-                x.push_back(4);
-                x.push_back(1);
-                break;
-            case 6:
-                x.push_back(4);
-                x.push_back(2);
-                break;
-            case 7:
-                x.push_back(4);
-                x.push_back(3);
-                break;
-            case 9:
-                x.push_back(8);
-                x.push_back(1);
-                break;
-            case 10:
-                x.push_back(8);
-                x.push_back(2);
-                break;
-            case 11:
-                x.push_back(8);
-                x.push_back(3);
-                break;
-            case 12:
-                x.push_back(8);
-                x.push_back(4);
-                break;
-            case 13:
-                x.push_back(8);
-                x.push_back(4);
-                x.push_back(1);
-                break;
-            case 14:
-                x.push_back(8);
-                x.push_back(4);
-                x.push_back(2);
-                break;
-            case 15:
-                x.push_back(8);
-                x.push_back(4);
-                x.push_back(3);
-                break;
-
-            default:
-                assert(false);
-        }
-        return x;
-    }
-
-    std::string
-    to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, const std::string &address)
-    {
-        switch (type)
-        {
-            case GpuLoadStoreType::Load:
-                if (vector_width != 1)
-                {
-                    return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")";
-                }
-                else
-                {
-                    return data + " = *(" + address + ")";
-                }
-                break;
-            case GpuLoadStoreType::Store:
-                if (vector_width != 1)
-                {
-                    return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")";
-                }
-                else
-                {
-                    return "*(" + address + ") = " + data;
-                }
-                break;
-            default:
-                std::cout << "Unsupported GpuLoadStoreType" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    std::string
-    to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const
-    {
-        auto tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
-        assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr);
-        const std::string ptr_buf  = _mapper.tensor_argument()->storage(tensor_storage);
-        const std::string dst_type = get_cl_data_type(_dst->format().dt, 1);
-
-        std::string address;
-        address += "(__global ";
-        address += dst_type;
-        address += "*)(";
-        address += ptr_buf;
-        if (x != "0" && (_mapper.is_one_component_x() != true))
-        {
-            address += " + (";
-            address += x + ") * sizeof(" + dst_type + ")";
-        }
-        if (y != "0")
-        {
-            const std::string stride_y = _mapper.tensor_component_stride_y();
-            address += " + (";
-            address += y + ")";
-            address += " * ";
-            address += stride_y;
-        }
-        if (z != "0")
-        {
-            const std::string stride_z = _mapper.tensor_component_stride_z();
-            address += " + (";
-            address += z + ")";
-            address += " * ";
-            address += stride_z;
-        }
-        if (b != "0" && (_mapper.is_one_component_batch() != true))
-        {
-            const std::string stride_b = _mapper.tensor_component_stride_batch();
-            address += " + (";
-            address += b + ")";
-            address += " * ";
-            address += stride_b;
-        }
-        address += ")";
-        return address;
-    }
-};
-
-class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter
-{
-public:
-    static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst)
-    {
-        CKW_UNUSED(x);
-
-        if (dst->format().w != 4)
-        {
-            return false;
-        }
-        if (mapper.gpu_sampler().address_mode_x != TensorSamplerAddressModeX::None)
-        {
-            return false;
-        }
-        if (mapper.gpu_sampler().address_mode_z != TensorSamplerAddressModeZ::None)
-        {
-            return false;
-        }
-        if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dReadOnly && type == GpuLoadStoreType::Load)
-        {
-            return false;
-        }
-        if (mapper.gpu_sampler().storage != GpuSamplerTensorStorage::Image2dWriteOnly &&
-            type == GpuLoadStoreType::Store)
-        {
-            return false;
-        }
-        if ((dst->format().dt != DataType::Fp32) && (dst->format().dt != DataType::Fp16))
-        {
-            return false;
-        }
-        return true;
-        /*
-        - x: Only GpuSamplerAddressModeX::None is supported and vector length = 4
-        - z: Only GpuSamplerAddressModeZ::None is supported
-        */
-    }
-
-    ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
-        : IGpuLoadStoreHelperWriter(x, mapper, type)
-    {
-    }
-
-    ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default;
-
-    ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default;
-
-    void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override
-    {
-        assert(validate(_writer, _mapper, _type, dst));
-
-        _dst           = dst;
-        _ls_width_full = dst->format().w;
-        _coord_x       = x->scalar(0, 0).str;
-        _coord_z       = z->scalar(0, 0).str;
-        _coord_b       = b->scalar(0, 0).str;
-
-        /*
-        if(y)
-        {
-            // full load/store width
-        }
-        else
-        {
-            // no load/store
-        }
-        */
-    }
-
-    void write(const std::pair<int32_t, std::string> &y) override
-    {
-        int32_t     idx_y   = y.first;
-        std::string coord_y = y.second;
-
-        // The only check required is on Y.
-        out_of_bound_initialize_y(coord_y);
-
-        const std::string dst     = _dst->vector(idx_y).str;
-        const std::string sampler = to_ls_image2d_sampler();
-        const std::string coord   = to_ls_image2d_coord(_coord_x, coord_y, _coord_z, _coord_b);
-        const std::string ls_buf  = to_ls_image2d(_type, _ls_width_full, dst, sampler, coord);
-
-        _writer->write_text(ls_buf);
-        _writer->write_text(";\n");
-
-        out_of_bound_finalize_y(dst);
-    }
-
-    void finalize() override
-    {
-    }
-
-private:
-    IVectorTile *_dst{nullptr};
-    int32_t      _ls_width_full{0};
-    std::string  _coord_x{};
-    std::string  _coord_z{};
-    std::string  _coord_b{};
-
-    void out_of_bound_initialize_y(std::string &coord)
-    {
-        std::string max = "";
-
-        const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
-        switch (address_mode_y)
-        {
-            case TensorSamplerAddressModeY::Skip:
-                max = _mapper.tensor_component_y();
-                _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n");
-                _writer->compound_statement_begin();
-                break;
-            case TensorSamplerAddressModeY::SkipMinEdgeOnly:
-                _writer->write_text("if(" + coord + " >= 0)\n");
-                _writer->compound_statement_begin();
-                break;
-            case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
-                max = _mapper.tensor_component_y();
-                _writer->write_text("if(" + coord + " < " + max + ")\n");
-                _writer->compound_statement_begin();
-                break;
-            case TensorSamplerAddressModeY::ClampToBorder:
-            case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToNearest:
-            case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
-            case TensorSamplerAddressModeY::None:
-                break;
-            default:
-                std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl;
-                assert(false);
-        }
-    };
-
-    void out_of_bound_finalize_y(const std::string &dst)
-    {
-        CKW_UNUSED(dst);
-
-        const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
-        switch (address_mode_y)
-        {
-            case TensorSamplerAddressModeY::Skip:
-            case TensorSamplerAddressModeY::SkipMinEdgeOnly:
-            case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
-                _writer->compound_statement_end();
-                break;
-
-            default:
-                assert(false);
-        }
-    };
-
-    std::string to_ls_image2d(GpuLoadStoreType   type,
-                              int32_t            vector_width,
-                              const std::string &data,
-                              const std::string &sampler,
-                              const std::string &coord)
-    {
-        CKW_UNUSED(vector_width);
-
-        auto              tensor_storage = static_cast<GpuTensorStorage>(_mapper.gpu_sampler().storage);
-        const std::string image2d_obj    = _mapper.tensor_argument()->storage(tensor_storage);
-        const std::string post_fix       = _dst->format().dt == DataType::Fp32 ? "f" : "h";
-
-        switch (type)
-        {
-            case GpuLoadStoreType::Load:
-                return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")";
-                break;
-            case GpuLoadStoreType::Store:
-                return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")";
-            default:
-                assert(false);
-                std::cout << "Unsupported GpuLoadStoreType" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    std::string to_ls_image2d_sampler() const
-    {
-        const auto address_mode_y = _mapper.gpu_sampler().address_mode_y;
-
-        switch (address_mode_y)
-        {
-            case TensorSamplerAddressModeY::None:
-                return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST";
-            case TensorSamplerAddressModeY::Skip:
-            case TensorSamplerAddressModeY::SkipMinEdgeOnly:
-            case TensorSamplerAddressModeY::SkipMaxEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToBorder:
-            case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly:
-                return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST";
-            case TensorSamplerAddressModeY::ClampToNearest:
-            case TensorSamplerAddressModeY::ClampToMaxEdgeOnly:
-            case TensorSamplerAddressModeY::ClampToMinEdgeOnly:
-                return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST";
-            default:
-                std::cout << "Unsupported address_mode_coord" << std::endl;
-                assert(false);
-                return "";
-        }
-    }
-
-    std::string
-    to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, const std::string &b) const
-    {
-        std::string coord_x = "(" + x + ") >> 2";
-        std::string coord_y = "(";
-
-        if (y != "0")
-        {
-            coord_y += y;
-        }
-        if (z != "0" && (_mapper.is_one_component_z() != true))
-        {
-            const std::string dim = _mapper.tensor_component_y();
-            coord_y += " + (";
-            coord_y += z + ")";
-            coord_y += " * ";
-            coord_y += dim;
-        }
-        if (b != "0" && (_mapper.is_one_component_batch() != true))
-        {
-            const std::string dim0 = _mapper.tensor_component_y();
-            const std::string dim1 = _mapper.tensor_component_z();
-            coord_y += " + (";
-            coord_y += b + ")";
-            coord_y += " * ";
-            coord_y += dim0;
-            coord_y += " * ";
-            coord_y += dim1;
-        }
-        coord_y += ")";
-        return "(int2)(" + coord_x + ", " + coord_y + ")";
-    }
-};
-
-/** IGpuLoadStoreHelperWriter factory class */
-class ClLoadStoreHelperWriterFactory final
-{
-public:
-    /** Static method to call the IGpuLoadStoreHelperWriter class accordingly with the tensor storage set in the mapper
-     *
-     *
-     * @return IGpuLoadStoreHelperWriter
-     */
-    static std::unique_ptr<IGpuLoadStoreHelperWriter>
-    create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type)
-    {
-        const auto tensor_storage = mapper.gpu_sampler().storage;
-        switch (tensor_storage)
-        {
-            case GpuSamplerTensorStorage::BufferUint8Ptr:
-                return std::make_unique<ClLoadStoreBufferHelperWriter>(x, mapper, type);
-            case GpuSamplerTensorStorage::Image2dReadOnly:
-            case GpuSamplerTensorStorage::Image2dWriteOnly:
-                return std::make_unique<ClLoadStoreImage2dHelperWriter>(x, mapper, type);
-            default:
-                std::cout << "Unsupported Gpu tensor storage" << std::endl;
-                assert(false);
-                return nullptr;
-        }
-    }
-};
-
-// This utility method needs to go in utils.h
-inline bool is_tile_scalar(const IVectorTile *x)
-{
-    return x->format().w == 1 && x->format().h == 1;
-}
-
-class ClKernelWriter : public IGpuKernelWriter
-{
-public:
-    ClKernelWriter(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
-    {
-        _data = x;
-        _attr = attr;
-    }
-
-    ClKernelWriter(const ClKernelWriter &) = default;
-
-    ClKernelWriter &operator=(const ClKernelWriter &) = default;
-
-    // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure
-    // there are no conflicts or ambiguity in the code
-    void set_IdSpace(int32_t id) override
-    {
-        _data->tiles.set_IdSpace(id);
-        _data->arguments.set_IdSpace(id);
-    }
-
-    void import_tile(const std::string &dst_name, const IVectorTile *src) override
-    {
-        _data->tiles.insert(dst_name, src);
-    }
-
-    void declare_argument(const std::string &name, const TensorInfo &tensor) override
-    {
-        assert(_data->arguments[name] == nullptr);
-        _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value);
-    }
-
-    void declare_tile(const std::string &name, const TileInfo &format) override
-    {
-        assert(_data->tiles[name] == nullptr);
-        _data->tiles.insert(name, format);
-
-        IVectorTile *x = _data->tiles[name];
-
-        for (auto &t : x->underlying_source_variables())
-        {
-            _data->code += t.type.str + " " + t.str + ";\n";
-        }
-    }
-
-    void
-    declare_const_tile(const std::string &name, const std::vector<std::vector<std::string>> &in, DataType dt) override
-    {
-        assert(_data->tiles[name] == nullptr);
-        _data->tiles.insert(name, in, dt);
-        // Note: A constant does not need to be declared in the code
-    }
-
-    void write_text(const std::string &x) override
-    {
-        _data->code += x;
-    }
-
-    void compound_statement_begin() override
-    {
-        _data->tiles.increment_registry_level();
-        _data->code += "{\n";
-    }
-
-    void compound_statement_end() override
-    {
-        _data->tiles.decrement_registry_level();
-        _data->code += "}\n";
-    }
-
-    void op_get_global_id(const Operand &dst_var, int32_t dim) override
-    {
-        assert(dst_var.type() == OperandType::Tile);
-        assert(_data->tiles.has_tile(dst_var.value()));
-        assert(_data->tiles[dst_var.value()]->format().w == 1 &&
-               _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
-
-        auto var = _data->tiles[dst_var.value()];
-
-        _data->code += var->scalar(0, 0).str;
-        _data->code += " = get_global_id(";
-        _data->code += std::to_string(dim);
-        _data->code += ");\n";
-    };
-
-    void op_get_global_coord(const Operand       &o_dst,
-                             const Operand       &o_step,
-                             const TensorOperand &o_tensor,
-                             int32_t              dim) override
-    {
-        OperandUnpacker operands(_data->tiles, _data->arguments);
-        auto            dst  = operands.unpack(o_dst);
-        auto            step = operands.unpack(o_step);
-
-        // Validation: Check that x, y and z are scalar
-
-        TensorOperandUnpacker tensor_operands(_data->arguments);
-        auto                  tensor      = tensor_operands.unpack(o_tensor);
-        auto                  gpu_sampler = o_tensor.sampler();
-
-        GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
-        switch (dim)
-        {
-            case 0:
-                if (mapper.is_one_component_x())
-                {
-                    _data->code += dst->scalar(0, 0).str;
-                    _data->code += " = 0;\n";
-                }
-                else
-                {
-                    if (mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin)
-                    {
-                        // Validation: Check: fixed tensor shape
-                        // TO BE CHANGED
-                        _data->code += dst->scalar(0, 0).str;
-                        _data->code += " = get_global_id(0) * ";
-                        _data->code += step->scalar(0, 0).str;
-                        _data->code += ";\n";
-                    }
-                    else
-                    {
-                        _data->code += dst->scalar(0, 0).str;
-                        _data->code += " = get_global_id(0) * ";
-                        _data->code += step->scalar(0, 0).str;
-                        _data->code += ";\n";
-                    }
-                }
-                break;
-            case 1:
-                if (mapper.is_one_component_y())
-                {
-                    _data->code += dst->scalar(0, 0).str;
-                    _data->code += " = 0;\n";
-                }
-                else
-                {
-                    if (mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin)
-                    {
-                    }
-                    else
-                    {
-                        _data->code += dst->scalar(0, 0).str;
-                        _data->code += " = get_global_id(1) * ";
-                        _data->code += step->scalar(0, 0).str;
-                        _data->code += ";\n";
-                    }
-                }
-                break;
-            case 2:
-                if (mapper.is_one_component_z())
-                {
-                    _data->code += dst->scalar(0, 0).str;
-                    _data->code += " = 0;\n";
-                }
-                else
-                {
-                    _data->code += dst->scalar(0, 0).str;
-                    _data->code += " = get_global_id(2) * ";
-                    _data->code += step->scalar(0, 0).str;
-                    _data->code += ";\n";
-                }
-                break;
-            default:
-                break;
-        }
-    };
-
-    void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *dst = operands.unpack(o_dst);
-
-        TensorOperandUnpacker tensor_operands(_data->arguments);
-        IGpuTensorArgument   *tensor      = tensor_operands.unpack(o_tensor);
-        auto                  gpu_sampler = o_tensor.sampler();
-
-        GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
-        if (mapper.is_one_component_batch())
-        {
-            _data->code += dst->scalar(0, 0).str;
-            _data->code += " = 0;\n";
-        }
-        else
-        {
-            std::cout << "Unsupported batched computation" << std::endl;
-            assert(false);
-        }
-    };
-
-    void op_get_global_size(const Operand &dst_var, int32_t dim) override
-    {
-        assert(dst_var.type() == OperandType::Tile);
-        assert(_data->tiles.has_tile(dst_var.value()));
-        assert(_data->tiles[dst_var.value()]->format().w == 1 &&
-               _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable
-
-        auto var = _data->tiles[dst_var.value()];
-
-        _data->code += var->scalar(0, 0).str;
-        _data->code += " = get_global_size(";
-        _data->code += std::to_string(dim);
-        _data->code += ");\n";
-    }
-
-    void op_unary_expression(const Operand &dst_name, UnaryOp op, const Operand &src_name) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *src = operands.unpack(src_name);
-        const IVectorTile *dst = operands.unpack(dst_name);
-
-        const int32_t     dst_w = dst->format().w;
-        const int32_t     dst_h = dst->format().h;
-        const int32_t     src_w = src->format().w;
-        const std::string dt    = dst->underlying_source_variables()[0].type.str;
-
-        const bool broadcast_src_x = dst_w != 1 && src_w == 1;
-
-        const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
-
-        // Broadcasting on Y is automatic
-        for (int32_t y = 0; y < dst_h; ++y)
-        {
-            _data->code += dst->vector(y).str;
-            _data->code += " = ";
-            _data->code += to_string(op);
-            _data->code += src_prefix + src->vector(y).str;
-            _data->code += ";\n";
-        }
-    }
-
-    void op_binary_expression(const Operand &dst_name,
-                              const Operand &lhs_name,
-                              BinaryOp       op,
-                              const Operand &rhs_name) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *lhs = operands.unpack(lhs_name);
-        const IVectorTile *rhs = operands.unpack(rhs_name);
-        const IVectorTile *dst = operands.unpack(dst_name);
-
-        const int32_t dst_w = dst->format().w;
-        const int32_t dst_h = dst->format().h;
-        assert(lhs != nullptr);
-        const int32_t lhs_w = lhs->format().w;
-        const int32_t rhs_w = rhs->format().w;
-
-        if (op == BinaryOp::MatMul_Nt_T)
-        {
-            assert((dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16));
-            for (int32_t y = 0; y < dst_h; ++y)
-            {
-                for (int32_t x = 0; x < dst_w; ++x)
-                {
-                    for (int32_t k = 0; k < lhs_w; ++k)
-                    {
-                        _data->code += dst->scalar(x, y).str;
-                        _data->code += " = fma(";
-                        _data->code += lhs->scalar(k, y).str;
-                        _data->code += ", ";
-                        _data->code += rhs->scalar(k, x).str;
-                        _data->code += ", ";
-                        _data->code += dst->scalar(x, y).str;
-                        _data->code += ");\n";
-                    }
-                }
-            }
-
-            return;
-        }
-
-        const bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1;
-        const bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1;
-
-        const std::string lhs_prefix =
-            broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
-        const std::string rhs_prefix =
-            broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : "";
-        const std::string op_str = to_string(op);
-
-        // Broadcasting on Y is automatic
-        for (int32_t y = 0; y < dst_h; ++y)
-        {
-            _data->code += dst->vector(y).str;
-            _data->code += " = ";
-            _data->code += lhs_prefix + lhs->vector(y).str;
-            _data->code += " ";
-            _data->code += op_str;
-            _data->code += " ";
-            _data->code += rhs_prefix + rhs->vector(y).str;
-            _data->code += ";\n";
-        }
-    };
-
-    void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *src = operands.unpack(o_src);
-        const IVectorTile *dst = operands.unpack(o_dst);
-        // const int32_t dst_w  = dst->format().w;
-        const int32_t     dst_h    = dst->format().h;
-        const std::string dt       = dst->underlying_source_variables()[0].type.str;
-        const bool        is_float = (dst->format().dt == DataType::Fp32) || (dst->format().dt == DataType::Fp16);
-        const std::string sat      = ((policy == ConvertPolicy::Saturate && !is_float) ? "_sat" : "");
-
-        // Broadcasting on Y is automatic
-        for (int32_t y = 0; y < dst_h; ++y)
-        {
-            _data->code += dst->vector(y).str;
-            _data->code += " = convert_" + dt + sat + "(";
-            _data->code += src->vector(y).str;
-            _data->code += ");\n";
-        }
-    };
-
-    void op_assign(const Operand &dst_name, const Operand &src_name) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *src = operands.unpack(src_name);
-        const IVectorTile *dst = operands.unpack(dst_name);
-
-        const int32_t     dst_w = dst->format().w;
-        const int32_t     dst_h = dst->format().h;
-        const int32_t     src_w = src->format().w;
-        const std::string dt    = dst->underlying_source_variables()[0].type.str;
-
-        const bool broadcast_src_x = dst_w != 1 && src_w == 1;
-
-        const std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : "";
-
-        // Broadcasting on Y is automatic
-        for (int32_t y = 0; y < dst_h; ++y)
-        {
-            _data->code += dst->vector(y).str;
-            _data->code += " = ";
-            _data->code += src_prefix + src->vector(y).str;
-            _data->code += ";\n";
-        }
-    }
-
-    void op_unary_elementwise_function(const Operand &dst_name, UnaryFunction func, const Operand &src_name) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *src = operands.unpack(src_name);
-        const IVectorTile *dst = operands.unpack(dst_name);
-
-        const int32_t     dst_h = dst->format().h;
-        const std::string dt    = dst->underlying_source_variables()[0].type.str;
-
-        // Always perform an explicit cast. This automatically covers at least the 2 scenarios:
-        // 1. Widen a scalar into a vector type. This enables scalar-vector broadcasting
-        // 2. Ensure non-ambiguity over function overloads.
-        //    E.g. a constant tile may be accidentally initialized with a double literal. By casting it to single float,
-        //    it avoids ambiguous function calls
-        const std::string src_prefix = "(" + dt + ")";
-
-        // Broadcasting on Y is automatic
-        for (int32_t y = 0; y < dst_h; ++y)
-        {
-            _data->code += dst->vector(y).str;
-            _data->code += " = ";
-
-            switch (func)
-            {
-                case UnaryFunction::Exp:
-                    _data->code += "exp(";
-                    break;
-                case UnaryFunction::Tanh:
-                    _data->code += "tanh(";
-                    break;
-                case UnaryFunction::Sqrt:
-                    _data->code += "sqrt(";
-                    break;
-                case UnaryFunction::Erf:
-                    _data->code += "erf(";
-                    break;
-                case UnaryFunction::Fabs:
-                    _data->code += "fabs(";
-                    break;
-                case UnaryFunction::Log:
-                    _data->code += "log(";
-                    break;
-                case UnaryFunction::SizeOf:
-                    _data->code += "sizeof(";
-                    break;
-                case UnaryFunction::Round:
-                    _data->code += "round(";
-                    break;
-                case UnaryFunction::Floor:
-                    _data->code += "floor(";
-                    break;
-                default:
-                    CKW_ASSERT_MSG(false, "Unexpected UnaryFunction used.");
-            }
-
-            _data->code += src_prefix + src->vector(y).str;
-            _data->code += ");\n";
-        }
-    }
-
-    void op_binary_elementwise_function(const Operand &dst_name,
-                                        BinaryFunction func,
-                                        const Operand &first_name,
-                                        const Operand &second_name) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *first  = operands.unpack(first_name);
-        const IVectorTile *second = operands.unpack(second_name);
-        const IVectorTile *dst    = operands.unpack(dst_name);
-
-        const int32_t     dst_h        = dst->format().h;
-        const auto        datatype     = dst->underlying_source_variables()[0].type;
-        const std::string datatype_str = datatype.str;
-
-        // Always perform an explicit cast. See similar comments in op_unary_elementwise_function
-        const std::string first_prefix  = "(" + datatype_str + ")";
-        const std::string second_prefix = "(" + datatype_str + ")";
-
-        const bool is_float = (datatype.dt == DataType::Fp32 || datatype.dt == DataType::Fp16);
-
-        // Broadcasting on Y is automatic
-        for (int32_t y = 0; y < dst_h; ++y)
-        {
-            _data->code += dst->vector(y).str;
-            _data->code += " = ";
-
-            switch (func)
-            {
-                case BinaryFunction::Min:
-                    _data->code += is_float ? "fmin(" : "min(";
-                    break;
-                case BinaryFunction::Max:
-                    _data->code += is_float ? "fmax(" : "max(";
-                    break;
-                default:
-                    CKW_ASSERT_MSG(false, "Unexpected BinaryFunction used.");
-            }
-
-            _data->code += first_prefix + first->vector(y).str;
-            _data->code += ", ";
-            _data->code += second_prefix + second->vector(y).str;
-            _data->code += ");\n";
-        }
-    }
-
-    void op_ternary_elementwise_function(const Operand  &dst_name,
-                                         TernaryFunction func,
-                                         const Operand  &first_name,
-                                         const Operand  &second_name,
-                                         const Operand  &third_name) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *first  = operands.unpack(first_name);
-        const IVectorTile *second = operands.unpack(second_name);
-        const IVectorTile *third  = operands.unpack(third_name);
-        const IVectorTile *dst    = operands.unpack(dst_name);
-
-        const int32_t     dst_h = dst->format().h;
-        const std::string dt    = dst->underlying_source_variables()[0].type.str;
-
-        // Always perform an explicit cast. See similar comments in op_unary_elementwise_function
-        const std::string first_prefix  = "(" + dt + ")";
-        const std::string second_prefix = "(" + dt + ")";
-        const std::string third_prefix  = "(" + dt + ")";
-
-        // Broadcasting on Y is automatic
-        for (int32_t y = 0; y < dst_h; ++y)
-        {
-            _data->code += dst->vector(y).str;
-            _data->code += " = ";
-
-            switch (func)
-            {
-                case TernaryFunction::Select:
-                    _data->code += "select(";
-                    break;
-                case TernaryFunction::Clamp:
-                    _data->code += "clamp(";
-                    break;
-                default:
-                    CKW_ASSERT_MSG(false, "Unexpected TernaryFunction used.");
-            }
-
-            _data->code += first_prefix + first->vector(y).str;
-            _data->code += ", ";
-            _data->code += second_prefix + second->vector(y).str;
-            _data->code += ", ";
-            _data->code += third_prefix + third->vector(y).str;
-            _data->code += ");\n";
-        }
-    }
-
-    void op_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *lhs = operands.unpack(o_lhs);
-        const IVectorTile *rhs = operands.unpack(o_rhs);
-
-        assert(is_tile_scalar(lhs));
-        assert(is_tile_scalar(rhs));
-
-        _data->code += "if(";
-        _data->code += lhs->scalar(0, 0).str;
-        _data->code += " ";
-        _data->code += to_string(op);
-        _data->code += " ";
-        _data->code += rhs->scalar(0, 0).str;
-        _data->code += ")\n";
-    }
-
-    void op_else_if_header(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override
-    {
-        _data->code += "else ";
-        op_if_header(o_lhs, op, o_rhs);
-    }
-
-    void op_else_header() override
-    {
-        _data->code += "else\n";
-    }
-
-    void op_for_loop_header(const Operand &var_name,
-                            BinaryOp       cond_op,
-                            const Operand &cond_value_name,
-                            const Operand &update_var_name,
-                            AssignmentOp   update_op,
-                            const Operand &update_value_name) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *var          = operands.unpack(var_name);
-        const IVectorTile *cond_value   = operands.unpack(cond_value_name);
-        const IVectorTile *update_var   = operands.unpack(update_var_name);
-        const IVectorTile *update_value = operands.unpack(update_value_name);
-
-        const int32_t dst_w = var->format().w;
-        const int32_t dst_h = var->format().h;
-
-        // It must be a scalar variable
-        CKW_UNUSED(dst_w, dst_h);
-        assert(dst_w == 1);
-        assert(dst_h == 1);
-
-        _data->code += "for(; ";
-        _data->code += var->scalar(0, 0).str;
-        _data->code += " ";
-        _data->code += to_string(cond_op);
-        _data->code += " " + cond_value->scalar(0, 0).str + "; ";
-        _data->code += update_var->scalar(0, 0).str;
-        _data->code += " ";
-        _data->code += to_string(update_op);
-        _data->code += " " + update_value->scalar(0, 0).str + ")";
-        _data->code += "\n";
-    }
-
-    void op_load_immediate(const TensorOperand &o_tensor,
-                           const Operand       &o_dst,
-                           const Operand       &o_x,
-                           const Operand       &o_y,
-                           const Operand       &o_z,
-                           const Operand       &o_batch_idx,
-                           const Operand       &dilation_y) override
-    {
-        OperandUnpacker operands(_data->tiles, _data->arguments);
-
-        // Not const as it requires changes to 'load_writer'.
-        IVectorTile *dst   = operands.unpack(o_dst);
-        IVectorTile *x     = operands.unpack(o_x);
-        IVectorTile *y     = operands.unpack(o_y);
-        IVectorTile *z     = operands.unpack(o_z);
-        IVectorTile *dil_y = operands.unpack(dilation_y);
-        IVectorTile *b     = operands.unpack(o_batch_idx);
-
-        TensorOperandUnpacker tensor_operands(_data->arguments);
-        IGpuTensorArgument   *tensor      = tensor_operands.unpack(o_tensor);
-        auto                  gpu_sampler = o_tensor.sampler();
-
-        GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
-        auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
-
-        // Initialize the constant part
-        load_writer->initialize(dst, x, z, b);
-
-        for (int i = 0; i < dst->format().h; ++i)
-        {
-            std::string coord_y = y->scalar(0, 0).str + " + " + std::to_string(i);
-            if (dil_y->scalar(0, 0).str != "1")
-            {
-                coord_y += " * " + dil_y->scalar(0, 0).str;
-            }
-            load_writer->write(std::make_pair(i, coord_y));
-        }
-
-        load_writer->finalize();
-    }
-
-    void op_load_indirect(const TensorOperand &o_tensor,
-                          const Operand       &o_dst,
-                          const Operand       &o_x,
-                          const Operand       &o_indirect_h,
-                          const Operand       &o_z,
-                          const Operand       &o_batch_idx) override
-    {
-        OperandUnpacker operands(_data->tiles, _data->arguments);
-
-        // Not const as it requires changes to 'load_writer'.
-        IVectorTile *dst   = operands.unpack(o_dst);
-        IVectorTile *x     = operands.unpack(o_x);
-        IVectorTile *y_ind = operands.unpack(o_indirect_h);
-        IVectorTile *z     = operands.unpack(o_z);
-        IVectorTile *b     = operands.unpack(o_batch_idx);
-
-        TensorOperandUnpacker tensor_operands(_data->arguments);
-        IGpuTensorArgument   *tensor      = tensor_operands.unpack(o_tensor);
-        auto                  gpu_sampler = o_tensor.sampler();
-
-        GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
-        auto load_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Load);
-
-        // Initialize the constant part
-        load_writer->initialize(dst, x, z, b);
-
-        for (int i = 0; i < dst->format().h; ++i)
-        {
-            load_writer->write(std::make_pair(i, y_ind->scalar(0, i).str));
-        }
-
-        load_writer->finalize();
-    }
-
-    void op_store_immediate(const TensorOperand &tensor_name,
-                            const Operand       &src_name,
-                            const Operand       &x_name,
-                            const Operand       &y_name,
-                            const Operand       &z_name,
-                            const Operand       &batch_index_name) override
-    {
-        OperandUnpacker operands(_data->tiles, _data->arguments);
-
-        // Not const as it requires changes to 'load_writer'.
-        IVectorTile *src = operands.unpack(src_name);
-        IVectorTile *x   = operands.unpack(x_name);
-        IVectorTile *y   = operands.unpack(y_name);
-        IVectorTile *z   = operands.unpack(z_name);
-        IVectorTile *b   = operands.unpack(batch_index_name);
-
-        TensorOperandUnpacker tensor_operands(_data->arguments);
-        IGpuTensorArgument   *tensor      = tensor_operands.unpack(tensor_name);
-        auto                  gpu_sampler = tensor_name.sampler();
-
-        GpuTensor3dMapper mapper(tensor, gpu_sampler);
-
-        auto store_writer = ClLoadStoreHelperWriterFactory::create(this, mapper, GpuLoadStoreType::Store);
-
-        // Initialize the constant part
-        store_writer->initialize(src, x, z, b);
-
-        int32_t tile_h = src->format().h;
-
-        for (int m0 = tile_h - 1; m0 >= 0; m0--)
-        {
-            store_writer->write(std::make_pair(m0, y->scalar(0, 0).str + " + " + std::to_string(m0)));
-        }
-
-        store_writer->finalize();
-    }
-
-    void op_return() override
-    {
-        _data->code += "return;\n";
-    }
-
-    void util_get_indirect_buffer(const Operand       &o_dst,
-                                  const TensorOperand &o_tensor,
-                                  const Operand       &o_x,
-                                  const Operand       &o_y,
-                                  const Operand       &o_x_off,
-                                  const Operand       &o_y_off) override
-    {
-        OperandUnpacker    operands(_data->tiles, _data->arguments);
-        const IVectorTile *dst   = operands.unpack(o_dst);
-        const IVectorTile *x     = operands.unpack(o_x);
-        const IVectorTile *y     = operands.unpack(o_y);
-        const IVectorTile *x_off = operands.unpack(o_x_off);
-        const IVectorTile *y_off = operands.unpack(o_y_off);
-
-        TensorOperandUnpacker tensor_operands(_data->arguments);
-        IGpuTensorArgument   *tensor = tensor_operands.unpack(o_tensor);
-
-        assert(dst->format().w == 1);
-        assert(x->format().w == 1);
-        assert(y->format().w == 1);
-        assert(x_off->format().w == 1);
-        assert(y_off->format().w == 1);
-        assert(dst->format().dt == DataType::Int32);
-        assert(x->format().dt == DataType::Int32);
-        assert(y->format().dt == DataType::Int32);
-        assert(x_off->format().dt == DataType::Int32);
-        assert(y_off->format().dt == DataType::Int32);
-
-        const std::string width  = tensor->component(TensorComponentType::Dim1);
-        const std::string height = tensor->component(TensorComponentType::Dim2);
-        const std::string wxh    = tensor->component(TensorComponentType::Dim1xDim2);
-        /*
-        int x_s;
-        int y_s;
-        x_s = (xi_0 + x_k);
-        y_s = (yi_0 + y_k);
-        mi_0 = x_s + y_s * width + b * widthxheight;
-        mi_0 = select(-1, mi_0, x_s >= 0);
-        mi_0 = select(-1, mi_0, y_s >= 0);
-        mi_0 = select(-1, mi_0, x_s < 128);
-        mi_0 = select(-1, mi_0, y_s < 128);
-        */
-        compound_statement_begin();
-        declare_tile("_x_s", TileInfo(DataType::Int32));
-        declare_tile("_y_s", TileInfo(DataType::Int32));
-        auto x_s = operands.unpack(Operand("_x_s"));
-        auto y_s = operands.unpack(Operand("_y_s"));
-        for (int i = 0; i < dst->format().h; ++i)
-        {
-            // x_s = (xi_0 + x_k);
-            // y_s = (yi_0 + y_k);
-            _data->code += x_s->scalar(0, i).str;
-            _data->code += " = (";
-            _data->code += x->scalar(0, i).str;
-            _data->code += " + ";
-            _data->code += x_off->scalar(0, i).str;
-            _data->code += ");\n";
-            _data->code += y_s->scalar(0, i).str;
-            _data->code += " = (";
-            _data->code += y->scalar(0, i).str;
-            _data->code += " + ";
-            _data->code += y_off->scalar(0, i).str;
-            _data->code += ");\n";
-            // mi_0 = x_s + y_s * width;
-            _data->code += dst->scalar(0, i).str;
-            _data->code += " = ";
-            _data->code += x_s->scalar(0, i).str;
-            _data->code += " + ";
-            _data->code += y_s->scalar(0, i).str;
-            _data->code += " * " + width + ";\n";
-            // mi_0 = select(wxh, mi_0, x_s >= 0);
-            _data->code += dst->scalar(0, i).str;
-            _data->code += " = select(-1, ";
-            _data->code += dst->scalar(0, i).str;
-            _data->code += ", ";
-            _data->code += x_s->scalar(0, i).str;
-            _data->code += " >= 0);\n";
-            // mi_0 = select(wxh, mi_0, x_s < width);
-            _data->code += dst->scalar(0, i).str;
-            _data->code += " = select(-1, ";
-            _data->code += dst->scalar(0, i).str;
-            _data->code += ", ";
-            _data->code += x_s->scalar(0, i).str;
-            _data->code += " < ";
-            _data->code += width + ");\n";
-            // mi_0 = select(wxh, mi_0, y_s >= 0);
-            _data->code += dst->scalar(0, i).str;
-            _data->code += " = select(-1, ";
-            _data->code += dst->scalar(0, i).str;
-            _data->code += ", ";
-            _data->code += y_s->scalar(0, i).str;
-            _data->code += " >= 0);\n";
-            // mi_0 = select(wxh, mi_0, y_s < height);
-            _data->code += dst->scalar(0, i).str;
-            _data->code += " = select(-1, ";
-            _data->code += dst->scalar(0, i).str;
-            _data->code += ", ";
-            _data->code += y_s->scalar(0, i).str;
-            _data->code += " < ";
-            _data->code += height + ");\n";
-        }
-        compound_statement_end();
-    }
-
-private:
-    GpuKernelWriterDataHolder *_data{nullptr};
-    GpuKernelWriterAttribute  *_attr{nullptr};
-};
-
-/** IGpuKernelWriter factory class */
-class GpuKernelWriterFactory final
-{
-public:
-    /** Static method to call the IGpuKernelWriter class accordingly with the Gpu programming language
-     *
-     * @param[in] gpu GPU target
-     *
-     * @return IGpuKernelWriter
-     */
-    static std::unique_ptr<IGpuKernelWriter> create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x)
-    {
-        switch (x->programming_language())
-        {
-            case GpuTargetLanguage::OpenCL:
-                return std::make_unique<ClKernelWriter>(attr, x);
-            default:
-                std::cout << "Unsupported Gpu programming language" << std::endl;
-                assert(false);
-                return nullptr;
-        }
-    }
-};
-
-inline int32_t
-adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx)
-{
-    auto tensor = tensor_info_id->shape;
-
-    int32_t dim[3] = {0};
-
-    switch (tensor_format)
-    {
-        case TensorSamplerFormat::C_W_H:
-            dim[0] = tensor[0];
-            dim[1] = tensor[1];
-            dim[2] = tensor[2];
-            break;
-        case TensorSamplerFormat::C_WH_1:
-            dim[0] = tensor[0];
-            dim[1] = tensor[1] * tensor[2];
-            dim[2] = 1;
-            break;
-        default:
-            std::cout << "Unsupported tensor format" << std::endl;
-            assert(false);
-            break;
-    }
-
-    return std::min(step, dim[idx]);
-}
-
-} // namespace prototype
-} // namespace ckw
-
-#endif // CKW_PROTOTYPE_SRC_PROTOTYPE_H
diff --git a/compute_kernel_writer/prototype/src/TensorInfo.cpp b/compute_kernel_writer/prototype/src/TensorInfo.cpp
deleted file mode 100644
index 561c126469..0000000000
--- a/compute_kernel_writer/prototype/src/TensorInfo.cpp
+++ /dev/null
@@ -1,77 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/TensorInfo.h"
-
-namespace ckw
-{
-TensorInfo::TensorInfo(DataType dt, const TensorShape &shape, TensorDataLayout dl, int32_t id)
-    : _shape(shape), _dt(dt), _dl(dl), _id(id)
-{
-}
-
-TensorInfo &TensorInfo::shape(const TensorShape &shape)
-{
-    _shape = shape;
-    return *this;
-}
-
-TensorShape TensorInfo::shape() const
-{
-    return _shape;
-}
-
-TensorInfo &TensorInfo::data_type(DataType dt)
-{
-    _dt = dt;
-    return *this;
-}
-
-DataType TensorInfo::data_type() const
-{
-    return _dt;
-}
-
-TensorInfo &TensorInfo::data_layout(TensorDataLayout dl)
-{
-    _dl = dl;
-    return *this;
-}
-
-TensorDataLayout TensorInfo::data_layout() const
-{
-    return _dl;
-}
-
-TensorInfo &TensorInfo::id(int32_t id)
-{
-    _id = id;
-    return *this;
-}
-
-int32_t TensorInfo::id() const
-{
-    return _id;
-}
-} // namespace ckw
diff --git a/compute_kernel_writer/prototype/src/TensorOperand.cpp b/compute_kernel_writer/prototype/src/TensorOperand.cpp
deleted file mode 100644
index d1aefbbb71..0000000000
--- a/compute_kernel_writer/prototype/src/TensorOperand.cpp
+++ /dev/null
@@ -1,272 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/TensorOperand.h"
-
-#include "ckw/Error.h"
-#include "ckw/Kernel.h"
-#include "ckw/TensorInfo.h"
-#include "ckw/TileOperand.h"
-
-#include "src/Prototype.h"
-
-namespace ckw
-{
-
-namespace
-{
-
-TensorComponentOperand &get_or_create_component(TensorOperand                           &tensor,
-                                                std::unique_ptr<TensorComponentOperand> &ptr,
-                                                TensorComponentType                      component)
-{
-    if (ptr == nullptr)
-    {
-        ptr = std::make_unique<TensorComponentOperand>(tensor, component);
-    }
-
-    return *ptr;
-}
-
-} // namespace
-
-// =================================================================================================
-// TensorOperand
-// =================================================================================================
-
-TensorOperand::TensorOperand(const std::string &name, const TensorInfo &info, TensorStorageType storage_type)
-    : OperandBase(name), _info(info), _storage_type(storage_type)
-{
-}
-
-prototype::Operand TensorOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
-{
-    CKW_UNUSED(writer);
-    return {name()};
-}
-
-const TensorInfo &TensorOperand::info() const
-{
-    return _info;
-}
-
-TensorInfo &TensorOperand::info()
-{
-    return _info;
-}
-
-TensorStorageType TensorOperand::storage_type() const
-{
-    return _storage_type;
-}
-
-DataType TensorOperand::data_type() const
-{
-    return _info.data_type();
-}
-
-bool TensorOperand::is_constant() const
-{
-    return false;
-}
-
-const TileOperand &TensorOperand::tile() const
-{
-    return *_tile;
-}
-
-TileOperand &TensorOperand::tile()
-{
-    return *_tile;
-}
-
-TensorOperand &TensorOperand::tile(TileOperand &tile)
-{
-    _tile = &tile;
-    return *this;
-}
-
-const TensorTileSampler &TensorOperand::tile_sampler() const
-{
-    return _tile_sampler;
-}
-
-TensorTileSampler &TensorOperand::tile_sampler()
-{
-    return _tile_sampler;
-}
-
-TensorOperand &TensorOperand::tile_sampler(const TensorTileSampler &value)
-{
-    _tile_sampler = value;
-    return *this;
-}
-
-TensorComponentOperand &TensorOperand::stride1()
-{
-    return get_or_create_component(*this, _stride1, TensorComponentType::Stride1);
-}
-
-TensorComponentOperand &TensorOperand::stride2()
-{
-    return get_or_create_component(*this, _stride2, TensorComponentType::Stride2);
-}
-
-TensorComponentOperand &TensorOperand::stride3()
-{
-    return get_or_create_component(*this, _stride3, TensorComponentType::Stride3);
-}
-
-TensorComponentOperand &TensorOperand::stride4()
-{
-    return get_or_create_component(*this, _stride4, TensorComponentType::Stride4);
-}
-
-TensorComponentOperand &TensorOperand::dim0()
-{
-    return get_or_create_component(*this, _dim0, TensorComponentType::Dim0);
-}
-
-TensorComponentOperand &TensorOperand::dim1()
-{
-    return get_or_create_component(*this, _dim1, TensorComponentType::Dim1);
-}
-
-TensorComponentOperand &TensorOperand::dim2()
-{
-    return get_or_create_component(*this, _dim2, TensorComponentType::Dim2);
-}
-
-TensorComponentOperand &TensorOperand::dim3()
-{
-    return get_or_create_component(*this, _dim3, TensorComponentType::Dim3);
-}
-
-TensorComponentOperand &TensorOperand::dim4()
-{
-    return get_or_create_component(*this, _dim4, TensorComponentType::Dim4);
-}
-
-TensorComponentOperand &TensorOperand::dim1_dim2()
-{
-    return get_or_create_component(*this, _dim1_dim2, TensorComponentType::Dim1xDim2);
-}
-
-TensorComponentOperand &TensorOperand::dim1_dim2_dim3()
-{
-    return get_or_create_component(*this, _dim1_dim2_dim3, TensorComponentType::Dim1xDim2xDim3);
-}
-
-TensorComponentOperand &TensorOperand::offset_first_element_in_bytes()
-{
-    return get_or_create_component(*this, _offset_first_element_in_bytes, TensorComponentType::OffsetFirstElement);
-}
-
-// =================================================================================================
-// TensorComponentOperand
-// =================================================================================================
-
-TensorComponentOperand::TensorComponentOperand(TensorOperand &tensor, TensorComponentType component)
-    : TileOperand(tensor.name(), DataType::Int32), _tensor(tensor), _component(component)
-{
-}
-
-TensorOperand &TensorComponentOperand::tensor()
-{
-    return _tensor;
-}
-
-const TensorOperand &TensorComponentOperand::tensor() const
-{
-    return _tensor;
-}
-
-TensorComponentType TensorComponentOperand::component_type() const
-{
-    return _component;
-}
-
-prototype::Operand TensorComponentOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
-{
-    CKW_UNUSED(writer);
-    prototype::OperandType type{prototype::OperandType::Unknown};
-
-    switch (_component)
-    {
-        case TensorComponentType::OffsetFirstElement:
-            type = prototype::OperandType::TensorDataOffset;
-            break;
-
-        case TensorComponentType::Stride1:
-            type = prototype::OperandType::TensorStride1;
-            break;
-
-        case TensorComponentType::Stride2:
-            type = prototype::OperandType::TensorStride2;
-            break;
-
-        case TensorComponentType::Stride3:
-            type = prototype::OperandType::TensorStride3;
-            break;
-
-        case TensorComponentType::Stride4:
-            type = prototype::OperandType::TensorStride4;
-            break;
-
-        case TensorComponentType::Dim0:
-            type = prototype::OperandType::TensorDim0;
-            break;
-
-        case TensorComponentType::Dim1:
-            type = prototype::OperandType::TensorDim1;
-            break;
-
-        case TensorComponentType::Dim2:
-            type = prototype::OperandType::TensorDim2;
-            break;
-
-        case TensorComponentType::Dim3:
-            type = prototype::OperandType::TensorDim3;
-            break;
-
-        case TensorComponentType::Dim4:
-            type = prototype::OperandType::TensorDim4;
-            break;
-
-        case TensorComponentType::Dim1xDim2:
-            type = prototype::OperandType::TensorDim1xDim2;
-            break;
-
-        case TensorComponentType::Dim1xDim2xDim3:
-            type = prototype::OperandType::TensorDim1xDim2xDim3;
-            break;
-
-        default:
-            CKW_ASSERT(false);
-    }
-
-    return prototype::Operand(name(), type);
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp b/compute_kernel_writer/prototype/src/TensorTileSampler.cpp
deleted file mode 100644
index bf9f946ce8..0000000000
--- a/compute_kernel_writer/prototype/src/TensorTileSampler.cpp
+++ /dev/null
@@ -1,191 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/TensorTileSampler.h"
-
-#include "ckw/TileOperand.h"
-#include "ckw/types/TensorSamplerTypes.h"
-
-namespace ckw
-{
-
-TensorTileSampler::TensorTileSampler()
-{
-}
-
-TensorTileSampler::TensorTileSampler(TileOperand              &x,
-                                     TileOperand              &y,
-                                     TileOperand              &z,
-                                     TileOperand              &b,
-                                     TensorSamplerFormat       format,
-                                     TensorSamplerAddressModeX address_mode_x,
-                                     TensorSamplerAddressModeY address_mode_y,
-                                     TensorSamplerAddressModeZ address_mode_z)
-    : _x(&x),
-      _y(&y),
-      _z(&z),
-      _b(&b),
-      _height(0),
-      _width(0),
-      _format(format),
-      _address_mode_x(address_mode_x),
-      _address_mode_y(address_mode_y),
-      _address_mode_z(address_mode_z)
-{
-}
-
-TensorTileSampler::TensorTileSampler(TileOperand              &x,
-                                     TileOperand              &y,
-                                     TileOperand              &z,
-                                     TileOperand              &b,
-                                     int32_t                   height,
-                                     int32_t                   width,
-                                     TensorSamplerFormat       format,
-                                     TensorSamplerAddressModeX address_mode_x,
-                                     TensorSamplerAddressModeY address_mode_y,
-                                     TensorSamplerAddressModeZ address_mode_z)
-    : _x(&x),
-      _y(&y),
-      _z(&z),
-      _b(&b),
-      _height(height),
-      _width(width),
-      _format(format),
-      _address_mode_x(address_mode_x),
-      _address_mode_y(address_mode_y),
-      _address_mode_z(address_mode_z)
-{
-}
-
-const TileOperand &TensorTileSampler::x() const
-{
-    return *_x;
-}
-
-TensorTileSampler &TensorTileSampler::x(TileOperand &x)
-{
-    _x = &x;
-    return *this;
-}
-
-const TileOperand &TensorTileSampler::y() const
-{
-    return *_y;
-}
-
-TensorTileSampler &TensorTileSampler::y(TileOperand &y)
-{
-    _y = &y;
-    return *this;
-}
-
-const TileOperand &TensorTileSampler::z() const
-{
-    return *_z;
-}
-
-TensorTileSampler &TensorTileSampler::z(TileOperand &z)
-{
-    _z = &z;
-    return *this;
-}
-
-const TileOperand &TensorTileSampler::b() const
-{
-    return *_b;
-}
-
-TensorTileSampler &TensorTileSampler::b(TileOperand &b)
-{
-    _b = &b;
-    return *this;
-}
-
-int32_t TensorTileSampler::width() const
-{
-    return _width;
-}
-
-TensorTileSampler &TensorTileSampler::width(int32_t width)
-{
-    _width = width;
-    return *this;
-}
-
-int32_t TensorTileSampler::height() const
-{
-    return _height;
-}
-
-TensorTileSampler &TensorTileSampler::height(int32_t height)
-{
-    _height = height;
-    return *this;
-}
-
-TensorSamplerFormat TensorTileSampler::format() const
-{
-    return _format;
-}
-
-TensorTileSampler &TensorTileSampler::format(TensorSamplerFormat format)
-{
-    _format = format;
-    return *this;
-}
-
-TensorSamplerAddressModeX TensorTileSampler::address_mode_x() const
-{
-    return _address_mode_x;
-}
-
-TensorTileSampler &TensorTileSampler::address_mode_x(TensorSamplerAddressModeX address_mode_x)
-{
-    _address_mode_x = address_mode_x;
-    return *this;
-}
-
-TensorSamplerAddressModeY TensorTileSampler::address_mode_y() const
-{
-    return _address_mode_y;
-}
-
-TensorTileSampler &TensorTileSampler::address_mode_y(TensorSamplerAddressModeY address_mode_y)
-{
-    _address_mode_y = address_mode_y;
-    return *this;
-}
-
-TensorSamplerAddressModeZ TensorTileSampler::address_mode_z() const
-{
-    return _address_mode_z;
-}
-
-TensorTileSampler &TensorTileSampler::address_mode_z(TensorSamplerAddressModeZ address_mode_z)
-{
-    _address_mode_z = address_mode_z;
-    return *this;
-}
-
-} // namespace ckw
diff --git a/compute_kernel_writer/prototype/src/TileInfo.cpp b/compute_kernel_writer/prototype/src/TileInfo.cpp
deleted file mode 100644
index 273266eedc..0000000000
--- a/compute_kernel_writer/prototype/src/TileInfo.cpp
+++ /dev/null
@@ -1,73 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/TileInfo.h"
-
-namespace ckw
-{
-TileInfo::TileInfo(DataType dt) : _dt(dt), _shape({{1, 1}})
-{
-}
-
-TileInfo::TileInfo(DataType dt, int32_t w) : _dt(dt), _shape({{w, 1}})
-{
-}
-
-TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) : _dt(dt), _shape({{w, h}})
-{
-}
-
-TileInfo &TileInfo::width(int32_t w)
-{
-    _shape[kTileWidthIdx] = w;
-    return *this;
-}
-
-int32_t TileInfo::width() const
-{
-    return _shape[kTileWidthIdx];
-}
-
-TileInfo &TileInfo::height(int32_t h)
-{
-    _shape[kTileHeightIdx] = h;
-    return *this;
-}
-
-int32_t TileInfo::height() const
-{
-    return _shape[kTileHeightIdx];
-}
-
-TileInfo &TileInfo::data_type(DataType dt)
-{
-    _dt = dt;
-    return *this;
-}
-
-DataType TileInfo::data_type() const
-{
-    return _dt;
-}
-} // namespace ckw
diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp
deleted file mode 100644
index e09c833d96..0000000000
--- a/compute_kernel_writer/prototype/src/TileOperand.cpp
+++ /dev/null
@@ -1,135 +0,0 @@
-/*
- * Copyright (c) 2023 Arm Limited.
- *
- * SPDX-License-Identifier: MIT
- *
- * Permission is hereby granted, free of charge, to any person obtaining a copy
- * of this software and associated documentation files (the "Software"), to
- * deal in the Software without restriction, including without limitation the
- * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
- * sell copies of the Software, and to permit persons to whom the Software is
- * furnished to do so, subject to the following conditions:
- *
- * The above copyright notice and this permission notice shall be included in all
- * copies or substantial portions of the Software.
- *
- * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
- * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
- * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
- * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
- * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
- * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
- * SOFTWARE.
- */
-
-#include "ckw/TileOperand.h"
-
-#include "ckw/Error.h"
-
-#include "src/Prototype.h"
-
-namespace ckw
-{
-
-TileOperand::TileOperand(const std::string &name, const TileInfo &info)
-    : OperandBase(name), _info(info), _value{std::vector<std::string>{"0"}}, _constant(false)
-{
-}
-
-TileOperand::TileOperand(const std::string &name, DataType data_type)
-    : OperandBase(name), _info(TileInfo{data_type}), _value{std::vector<std::string>{"0"}}, _constant(false)
-{
-}
-
-TileOperand::TileOperand(const std::string &name, int32_t value)
-    : OperandBase(name),
-      _info(TileInfo{DataType::Int32}),
-      _value{std::vector<std::string>{std::to_string(value)}},
-      _constant(true)
-{
-}
-
-TileOperand::TileOperand(const std::string &name, float value)
-    : OperandBase(name),
-      _info(TileInfo{DataType::Fp32}),
-      _value{std::vector<std::string>{std::to_string(value)}},
-      _constant(true)
-{
-}
-
-TileOperand::TileOperand(const std::string &name, const TileContainer &vals, DataType dt)
-    : OperandBase(name),
-      _info(TileInfo{dt, static_cast<int32_t>(vals.size()), static_cast<int32_t>(vals[0].size())}),
-      _value(vals),
-      _constant(true)
-{
-}
-
-prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter *writer) const
-{
-    CKW_UNUSED(writer);
-
-    if (_constant)
-    {
-        if (is_scalar())
-        {
-            switch (_info.data_type())
-            {
-                case DataType::Int32:
-                    return prototype::Operand(_value[0][0], prototype::OperandType::ScalarInt32);
-
-                case DataType::Fp32:
-                    return prototype::Operand(_value[0][0], prototype::OperandType::ScalarFp32);
-
-                case DataType::Fp16:
-                    return prototype::Operand(_value[0][0], prototype::OperandType::ScalarFp16);
-
-                default:
-                    CKW_ASSERT(false);
-            }
-        }
-        else
-        {
-            return prototype::Operand(name());
-        }
-    }
-    else
-    {
-        return prototype::Operand(name(), prototype::OperandType::Tile);
-    }
-}
-
-const TileInfo &TileOperand::tile_info() const
-{
-    return _info;
-}
-
-DataType TileOperand::data_type() const
-{
-    return _info.data_type();
-}
-
-bool TileOperand::is_constant() const
-{
-    return _constant;
-}
-
-bool TileOperand::is_scalar() const
-{
-    return _info.width() == 1 && _info.height() == 1;
-}
-
-std::string TileOperand::scalar_value() const
-{
-    CKW_ASSERT(is_scalar());
-    CKW_ASSERT(is_constant());
-
-    return _value[0][0];
-}
-
-const TileContainer &TileOperand::value() const
-{
-    return _value;
-}
-
-} // namespace ckw
author	Gunes Bayir <gunes.bayir@arm.com>	2024-02-07 15:34:45 +0000
committer	Gunes Bayir <gunes.bayir@arm.com>	2024-02-09 15:59:45 +0000
commit	0ee13afc4429411de9a05ba4c2ff8a580784b568 (patch)
tree	c9ee1acf684d52b92ffb7500b0b65eee8377ce45 /compute_kernel_writer
parent	a3e1b50588b89a2c0c67da2679728a422fc16402 (diff)
download	ComputeLibrary-0ee13afc4429411de9a05ba4c2ff8a580784b568.tar.gz