From acea4071a7f457bab696dc3c895ba47d60345541 Mon Sep 17 00:00:00 2001 From: Nikolaj Jensen Date: Mon, 3 Jul 2023 09:44:42 +0100 Subject: Fix code formatting in CKW Signed-off-by: Nikolaj Jensen Change-Id: I8064b345c1efd243f8bded12ed5d561afe7c339a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9854 Benchmark: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Jakub Sujak Comments-Addressed: Arm Jenkins --- compute_kernel_writer/README.md | 32 +- compute_kernel_writer/include/ckw/Error.h | 19 +- compute_kernel_writer/include/ckw/TensorInfo.h | 10 +- compute_kernel_writer/include/ckw/TileInfo.h | 8 + .../prototype/examples/add_exp_store.cpp | 2 +- .../examples/common/ExampleComponentArgument.cpp | 3 +- .../examples/common/ExampleComponentArgument.h | 7 +- .../examples/common/ExampleKernelWriter.cpp | 3 +- .../examples/common/ExampleScopedKernelWriter.h | 2 +- .../prototype/include/ckw/KernelWriter.h | 1 + .../prototype/include/ckw/OperandBase.h | 1 + .../prototype/include/ckw/TensorInfo.h | 10 +- .../prototype/include/ckw/TensorOperand.h | 2 +- .../prototype/include/ckw/TileInfo.h | 8 + compute_kernel_writer/prototype/src/Prototype.h | 1333 +++++++++++--------- compute_kernel_writer/prototype/src/TileInfo.cpp | 6 +- .../prototype/src/TileOperand.cpp | 3 +- compute_kernel_writer/src/Error.cpp | 3 +- compute_kernel_writer/src/Helpers.h | 4 +- compute_kernel_writer/src/ITile.h | 20 +- compute_kernel_writer/src/TensorUtils.h | 2 +- compute_kernel_writer/src/TileInfo.cpp | 6 +- compute_kernel_writer/src/cl/CLConstantTile.cpp | 2 +- compute_kernel_writer/src/cl/CLConstantTile.h | 6 +- compute_kernel_writer/src/cl/CLTile.cpp | 5 +- compute_kernel_writer/src/cl/CLTile.h | 6 +- compute_kernel_writer/src/cl/ICLTile.cpp | 2 +- compute_kernel_writer/validation/Validation.cpp | 16 +- .../validation/tests/CLConstantTileTest.hpp | 74 +- .../validation/tests/CLTileTest.hpp | 76 +- .../validation/tests/TensorBitMaskTest.hpp | 20 +- .../validation/tests/UtilsTest.hpp | 15 +- .../validation/tests/common/Common.h | 28 +- 33 files changed, 974 insertions(+), 761 deletions(-) diff --git a/compute_kernel_writer/README.md b/compute_kernel_writer/README.md index 9a920b7882..2c7636ca9f 100644 --- a/compute_kernel_writer/README.md +++ b/compute_kernel_writer/README.md @@ -2,7 +2,7 @@ Compute Kernel Writer is a tile-based, just-in-time code writer for deep learning and computer vision applications. This tool offers a C++ interface to allow developers to write functions without a return type (called "kernels") -using their preferred programming language (at the moment, only OpenCL is supported). +using their preferred programming language (at the moment, only OpenCL is supported). The library is specifically designed to be lightweight and to offer an intuitive API for efficient code writing. ## Getting started @@ -12,7 +12,8 @@ The following subsections show you how to do this. ### Dependencies -This project requires the following dependencies, obtainable via your preferred package manager, to be installed and available on your system. +This project requires the following dependencies, obtainable via your preferred package manager, to be installed and +available on your system. * `build-essential` * `cmake >= 3.14` @@ -20,8 +21,10 @@ This project requires the following dependencies, obtainable via your preferred In addition, the guide makes use of the following toolchains: -* (Optional) `Arm GNU toolchain` available to download from the [Arm Developer](https://developer.arm.com/downloads/-/arm-gnu-toolchain-downloads) website -* (Optional) `Android NDK toolset` available to download from the [Android Developer](https://developer.android.com/ndk/downloads/index.html) website +* (Optional) `Arm GNU toolchain` available to download from + the [Arm Developer](https://developer.arm.com/downloads/-/arm-gnu-toolchain-downloads) website +* (Optional) `Android NDK toolset` available to download from + the [Android Developer](https://developer.android.com/ndk/downloads/index.html) website ### Building and running tests @@ -35,13 +38,16 @@ CXX=g++ cmake -G Ninja -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DCKW_E cmake --build . ``` -The preceding commands build the library in release mode (`-DCMAKE_BUILD_TYPE=Release`) and targets OpenCL code generation (`-DCKW_ENABLE_OPENCL=ON`). -In addition, code assertions are enabled (`-DCKW_ENABLE_ASSERTS=ON`) and the test suite is built (`-DCKW_BUILD_TESTING=ON`). +The preceding commands build the library in release mode (`-DCMAKE_BUILD_TYPE=Release`) and targets OpenCL code +generation (`-DCKW_ENABLE_OPENCL=ON`). +In addition, code assertions are enabled (`-DCKW_ENABLE_ASSERTS=ON`) and the test suite is +built (`-DCKW_BUILD_TESTING=ON`). Alternatively, choose to build a static instead of a shared library by setting `-DBUILD_SHARED_LIBS=OFF`. #### Cross-compile to Linux AArch64 -The Arm GNU toolchain can be used to cross-compile the project to a Linux system with an AArch64 processor, like a Raspberry Pi, using an x86_64 Linux host machine. +The Arm GNU toolchain can be used to cross-compile the project to a Linux system with an AArch64 processor, like a +Raspberry Pi, using an x86_64 Linux host machine. ```shell mkdir -p build && cd build @@ -49,11 +55,13 @@ CXX=aarch64-none-linux-gnu-g++ cmake -G Ninja -DBUILD_SHARED_LIBS=ON -DCMAKE_BUI cmake --build . ``` -The build configuration is identical to the previous step but now requires specifying the target triple in the CXX compiler (`CXX=aarch64-none-linux-gnu-g++`) to generate binaries for the target platform. +The build configuration is identical to the previous step but now requires specifying the target triple in the CXX +compiler (`CXX=aarch64-none-linux-gnu-g++`) to generate binaries for the target platform. #### Cross-compile to Android AArch64 -Cross-compiling for Android systems requires the Android NDK toolset. The downloaded NDK contains the toolchain file necessary for cross-compiling the project. +Cross-compiling for Android systems requires the Android NDK toolset. The downloaded NDK contains the toolchain file +necessary for cross-compiling the project. ```shell mkdir -p build && cd build @@ -61,7 +69,8 @@ cmake -G Ninja -DBUILD_SHARED_LIBS=ON -DCMAKE_BUILD_TYPE=Release -DCKW_ENABLE_OP cmake --build . ``` -This build re-uses the same build configuration as before, but this time does not require specifying the CXX compiler as this (and other target-specific information) is handled by the toolchain file (`-DCMAKE_TOOLCHAIN_FILE`). +This build re-uses the same build configuration as before, but this time does not require specifying the CXX compiler as +this (and other target-specific information) is handled by the toolchain file (`-DCMAKE_TOOLCHAIN_FILE`). #### Run the validation test suite @@ -73,7 +82,8 @@ Confirm the project has been built successfully by running the validation test s ### List of build options -This project can be configured with the following build options. Enable options by passing them to the CMake command, preceded with `-D`. +This project can be configured with the following build options. Enable options by passing them to the CMake command, +preceded with `-D`. | Option | Description | |:---------------------|:------------------------------------------------------------------------------------------------------------------------------------------| diff --git a/compute_kernel_writer/include/ckw/Error.h b/compute_kernel_writer/include/ckw/Error.h index 996893823e..2793791802 100644 --- a/compute_kernel_writer/include/ckw/Error.h +++ b/compute_kernel_writer/include/ckw/Error.h @@ -24,8 +24,8 @@ #ifndef COMPUTE_KERNEL_WRITER_INCLUDE_CKW_ERROR_H #define COMPUTE_KERNEL_WRITER_INCLUDE_CKW_ERROR_H -#include #include +#include namespace ckw { @@ -38,19 +38,20 @@ namespace ckw * * @return status containing the error */ -std::string create_error_msg(const std::string &file, const std::string &func, const std::string &line, const std::string &msg); +std::string +create_error_msg(const std::string &file, const std::string &func, const std::string &line, const std::string &msg); /** Print the given message then throw an std::runtime_error. * * @param[in] msg Message to display. */ -#define COMPUTE_KERNEL_WRITER_ERROR_ON_MSG(msg) \ - do \ - { \ - const std::string arg0(__FILE__); \ - const std::string arg1(__func__); \ - const std::string arg2(std::to_string(__LINE__)); \ - const std::string arg3(msg); \ +#define COMPUTE_KERNEL_WRITER_ERROR_ON_MSG(msg) \ + do \ + { \ + const std::string arg0(__FILE__); \ + const std::string arg1(__func__); \ + const std::string arg2(std::to_string(__LINE__)); \ + const std::string arg3(msg); \ std::runtime_error(create_error_msg(arg0, arg1, arg2, arg3)); \ } while(false) diff --git a/compute_kernel_writer/include/ckw/TensorInfo.h b/compute_kernel_writer/include/ckw/TensorInfo.h index 44846bc94c..41abe60f35 100644 --- a/compute_kernel_writer/include/ckw/TensorInfo.h +++ b/compute_kernel_writer/include/ckw/TensorInfo.h @@ -117,20 +117,28 @@ public: * - less than 0: bind a virtual tensor (tile) */ TensorInfo(DataType dt, const TensorShape &shape, TensorDataLayout dl, int32_t id); + /** Set shape */ TensorInfo &shape(const TensorShape &shape); + /** Get shape */ TensorShape shape() const; + /** Set data type */ TensorInfo &data_type(DataType dt); + /** Get data type */ DataType data_type() const; + /** Set data layout */ TensorInfo &data_layout(TensorDataLayout dl); + /** Get data layout */ TensorDataLayout data_layout() const; + /** Set id */ TensorInfo &id(int32_t id); + /** Get layout */ int32_t id() const; @@ -140,6 +148,6 @@ private: TensorDataLayout _dl{ TensorDataLayout::Unknown }; int32_t _id{ -1 }; }; -} // namespace kw +} // namespace ckw #endif /* COMPUTE_KERNEL_WRITER_INCLUDE_CKW_TENSORINFO_H */ diff --git a/compute_kernel_writer/include/ckw/TileInfo.h b/compute_kernel_writer/include/ckw/TileInfo.h index 86a4b441b7..293a90fb94 100644 --- a/compute_kernel_writer/include/ckw/TileInfo.h +++ b/compute_kernel_writer/include/ckw/TileInfo.h @@ -48,12 +48,14 @@ public: * @param[in] dt Tile data type */ TileInfo(DataType dt); + /** Constructor used to initialize a vector with a given data type and vector length. * * @param[in] dt Tile data type * @param[in] w Tile width (or vector length) */ TileInfo(DataType dt, int32_t w); + /** Constructor used to initialize a tile with a given data type and tile sizes. * * @param[in] dt Tile data type @@ -61,16 +63,22 @@ public: * @param[in] w Tile width */ TileInfo(DataType dt, int32_t h, int32_t w); + /** Set width */ TileInfo &width(int32_t w); + /** Get width */ int32_t width() const; + /** Set height */ TileInfo &height(int32_t h); + /** Get height */ int32_t height() const; + /** Set data type */ TileInfo &data_type(DataType dt); + /** Get data type */ DataType data_type() const; diff --git a/compute_kernel_writer/prototype/examples/add_exp_store.cpp b/compute_kernel_writer/prototype/examples/add_exp_store.cpp index 9ee21957f1..a9be0495ec 100644 --- a/compute_kernel_writer/prototype/examples/add_exp_store.cpp +++ b/compute_kernel_writer/prototype/examples/add_exp_store.cpp @@ -155,7 +155,7 @@ void op_store(ExampleScopedKernelWriter writer, std::vectortensor(); const auto tile_name = tensor.name() + "_tile"; - auto &tile = declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); + auto &tile = declare_tile(tile_name.c_str(), + ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width())); op_load(tile, tensor, sampler); diff --git a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h index 1aa0242c51..4655b1897e 100644 --- a/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h +++ b/compute_kernel_writer/prototype/examples/common/ExampleScopedKernelWriter.h @@ -56,7 +56,7 @@ public: private: ExampleKernelWriter *_writer; - int32_t _parent_id_space; + int32_t _parent_id_space; }; #endif // CKW_PROTOTYPE_EXAMPLES_COMMON_EXAMPLESCOPEDKERNELWRITER_H diff --git a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h index a2778a9485..3b1539116a 100644 --- a/compute_kernel_writer/prototype/include/ckw/KernelWriter.h +++ b/compute_kernel_writer/prototype/include/ckw/KernelWriter.h @@ -39,6 +39,7 @@ namespace ckw namespace prototype { struct GpuKernelWriterAttribute; + class IGpuKernelWriter; } // namespace prototype diff --git a/compute_kernel_writer/prototype/include/ckw/OperandBase.h b/compute_kernel_writer/prototype/include/ckw/OperandBase.h index f4825e16a7..a9e313fc0a 100644 --- a/compute_kernel_writer/prototype/include/ckw/OperandBase.h +++ b/compute_kernel_writer/prototype/include/ckw/OperandBase.h @@ -33,6 +33,7 @@ namespace ckw namespace prototype { class IGpuKernelWriter; + class Operand; } // namespace prototype diff --git a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h index 00bb60a444..807158896b 100644 --- a/compute_kernel_writer/prototype/include/ckw/TensorInfo.h +++ b/compute_kernel_writer/prototype/include/ckw/TensorInfo.h @@ -117,20 +117,28 @@ public: * - less than 0: bind a virtual tensor (tile) */ TensorInfo(DataType dt, const TensorShape &shape, TensorDataLayout dl, int32_t id); + /** Set shape */ TensorInfo &shape(const TensorShape &shape); + /** Get shape */ TensorShape shape() const; + /** Set data type */ TensorInfo &data_type(DataType dt); + /** Get data type */ DataType data_type() const; + /** Set data layout */ TensorInfo &data_layout(TensorDataLayout dl); + /** Get data layout */ TensorDataLayout data_layout() const; + /** Set id */ TensorInfo &id(int32_t id); + /** Get layout */ int32_t id() const; @@ -140,6 +148,6 @@ private: TensorDataLayout _dl{ TensorDataLayout::Unknown }; int32_t _id{ -1 }; }; -} // namespace kw +} // namespace ckw #endif /* CKW_PROTOTYPE_INCLUDE_CKW_TENSORINFO_H */ diff --git a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h index 2fc5044d1c..7a663f095b 100644 --- a/compute_kernel_writer/prototype/include/ckw/TensorOperand.h +++ b/compute_kernel_writer/prototype/include/ckw/TensorOperand.h @@ -134,7 +134,7 @@ public: private: TensorInfo _info; - TileOperand *_tile{ nullptr }; + TileOperand *_tile{ nullptr }; TensorTileSampler _tile_sampler{}; ::std::unique_ptr _stride1{ nullptr }; diff --git a/compute_kernel_writer/prototype/include/ckw/TileInfo.h b/compute_kernel_writer/prototype/include/ckw/TileInfo.h index 8fba8bb827..c60880dcd1 100644 --- a/compute_kernel_writer/prototype/include/ckw/TileInfo.h +++ b/compute_kernel_writer/prototype/include/ckw/TileInfo.h @@ -48,12 +48,14 @@ public: * @param[in] dt Tile data type */ TileInfo(DataType dt); + /** Constructor used to initialize a vector with a given data type and vector length. * * @param[in] dt Tile data type * @param[in] w Tile width (or vector length) */ TileInfo(DataType dt, int32_t w); + /** Constructor used to initialize a tile with a given data type and tile sizes. * * @param[in] dt Tile data type @@ -61,16 +63,22 @@ public: * @param[in] w Tile width */ TileInfo(DataType dt, int32_t h, int32_t w); + /** Set width */ TileInfo &width(int32_t w); + /** Get width */ int32_t width() const; + /** Set height */ TileInfo &height(int32_t h); + /** Get height */ int32_t height() const; + /** Set data type */ TileInfo &data_type(DataType dt); + /** Get data type */ DataType data_type() const; diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h index b17481537f..fdb4ab1bab 100644 --- a/compute_kernel_writer/prototype/src/Prototype.h +++ b/compute_kernel_writer/prototype/src/Prototype.h @@ -25,27 +25,28 @@ #ifndef CKW_PROTOTYPE_SRC_PROTOTYPE_H #define CKW_PROTOTYPE_SRC_PROTOTYPE_H -#include -#include -#include -#include // int32_t -#include // cout (to be removed) -#include // assert (to be removed) -#include +#include +#include +#include // assert (to be removed) #include #include +#include // int32_t +#include // cout (to be removed) +#include #include -#include -#include #include +#include +#include +#include -#include "ckw/Types.h" -#include "ckw/TensorInfo.h" #include "ckw/Error.h" +#include "ckw/TensorInfo.h" +#include "ckw/Types.h" namespace ckw { -namespace prototype { +namespace prototype +{ // Dummy data structure for Size2D using Size2D = std::vector; @@ -62,8 +63,8 @@ enum class ComponentType : int32_t enum class GpuCompilationSpeed { - Fast = 0x00, // fast compilation may increase the latency of the network - Slow = 0x01 // slow compilation may decrease the latency of the network + Fast = 0x00, // fast compilation may increase the latency of the network + Slow = 0x01 // slow compilation may decrease the latency of the network }; enum class GpuExtensions @@ -76,16 +77,16 @@ enum class GpuExtensions struct TensorInfo { - TensorShape shape { {0} }; - DataType data_type { DataType::Unknown }; - TensorDataLayout data_layout { TensorDataLayout::Nhwc }; - int32_t id { -1 }; + TensorShape shape{ { 0 } }; + DataType data_type{ DataType::Unknown }; + TensorDataLayout data_layout{ TensorDataLayout::Nhwc }; + int32_t id{ -1 }; }; struct ComponentAttribute { - GpuCompilationSpeed compilation_speed {GpuCompilationSpeed::Fast}; - bool overwrite_tile { true }; + GpuCompilationSpeed compilation_speed{ GpuCompilationSpeed::Fast }; + bool overwrite_tile{ true }; }; inline std::string data_type_to_cl_type(DataType dt) @@ -151,7 +152,7 @@ inline int32_t width_to_cl_vector_size(int32_t width) inline std::string get_cl_data_type(DataType dt, int32_t width) { std::string data_type; - int32_t w = width_to_cl_vector_size(width); + int32_t w = width_to_cl_vector_size(width); data_type += data_type_to_cl_type(dt); if(w != 1) { @@ -174,16 +175,31 @@ inline std::string to_opencl_store(int32_t vector_length) struct TileInfo { - TileInfo() {} - TileInfo(DataType dt) : dt(dt), w(1), h(1) {} - TileInfo(DataType dt, int32_t width) : dt(dt), w(width), h(1) {} - TileInfo(DataType dt, int32_t width, int32_t height) : dt(dt), w(width), h(height) {} + TileInfo() + { + } + + TileInfo(DataType dt) + : dt(dt), w(1), h(1) + { + } + + TileInfo(DataType dt, int32_t width) + : dt(dt), w(width), h(1) + { + } + + TileInfo(DataType dt, int32_t width, int32_t height) + : dt(dt), w(width), h(height) + { + } + DataType dt{ DataType::Unknown }; // Data type of the tile - int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels) - int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions) + int32_t w{ 0 }; // Width (i.e. c0 - portion of the channels) + int32_t h{ 0 }; // Height (i.e. s0 - portion of the spatial dimensions) }; -inline std::ostream& operator << (std::ostream& o, const TileInfo& a) +inline std::ostream &operator<<(std::ostream &o, const TileInfo &a) { o << a.w << " x " << a.h; return o; @@ -191,15 +207,15 @@ inline std::ostream& operator << (std::ostream& o, const TileInfo& a) struct DataTypeAsString { - std::string str { "" }; - DataType dt { DataType::Unknown }; - int32_t size { 1 }; + std::string str{ "" }; + DataType dt{ DataType::Unknown }; + int32_t size{ 1 }; }; struct ValueAsString { - std::string str { "" }; - DataTypeAsString type { }; + std::string str{ "" }; + DataTypeAsString type{}; }; // https://stackoverflow.com/questions/51515378/storing-and-accessing-tile-properties-in-c @@ -208,6 +224,7 @@ class IScalarTile { public: virtual ~IScalarTile() = default; + /** Method to get the scalar variable from a tile * @param[in] x X coordinate on the width of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge * @param[in] y Y coordinate on the height of the tile. If out-of-bound, the coordinate is clamped to the nearest valid edge @@ -215,11 +232,13 @@ public: * @return the scalar variable as a string */ virtual ValueAsString scalar(int32_t x, int32_t y) const = 0; + /** Method to get the list of underlying variable names used by the tile * * @return the list of variable names */ virtual std::vector underlying_source_variables() const = 0; + /** Method to get the name of the tile. * * @return the name of the tile @@ -228,6 +247,7 @@ public: { return _basename; } + /** Method to get the tile format * * @return the format @@ -236,19 +256,22 @@ public: { return _format; } + /** Method to know whether the tile is assignable or not (constant) * * @return true if the tile is assignable */ virtual bool is_assignable() const = 0; + /** Method to know whether the tile needs to be declared * * @return true if the tile needs to be declared in the code before being used */ virtual bool need_declaration() const = 0; + protected: - TileInfo _format { }; // Tile format - std::string _basename { "" }; // Tile name + TileInfo _format{}; // Tile format + std::string _basename{ "" }; // Tile name }; // A tile is a collection of variables used to express a 2D data. The variables are vectors in the GPU context. @@ -257,6 +280,7 @@ class IVectorTile : public IScalarTile { public: virtual ~IVectorTile() = default; + /** Method to get the vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. * The user can query the list of supported width for the vectors through preferred_vector_sizes(). * @@ -265,6 +289,7 @@ public: * @return the vector variable as a string */ virtual ValueAsString vector(int32_t y) const = 0; + /** Method to get a vector variable from a tile. A vector is an ordered homogeneous collection of two or more scalars. * * @return the vector variable as a string @@ -280,9 +305,9 @@ public: class ClTile : public IVectorTile { public: - ClTile(const std::string& name, TileInfo format) + ClTile(const std::string &name, TileInfo format) { - _format = format; + _format = format; _basename = name; } @@ -373,7 +398,6 @@ private: if(_format.h == 1) { return var_name; - } else { @@ -542,26 +566,26 @@ enum class TensorComponentType : int32_t enum class TensorComponent : int32_t { - Unknown = 0x00000000, - OffsetFirstElement = 0x00000100, - Stride1 = 0x00001001, - Stride2 = 0x00001002, - Stride3 = 0x00001003, - Stride4 = 0x00001004, - Dim0 = 0x00010000, - Dim1 = 0x00010001, - Dim2 = 0x00010002, - Dim3 = 0x00010003, - Dim4 = 0x00010004, - C = 0x00010000, // Dim0 - W = 0x00010001, // Dim1 - H = 0x00010002, // Dim2 - D = 0x00010003, - N = 0x00010004, - Dim1xDim2 = 0x00100021, - Dim1xDim2xDim3 = 0x00100321, - WxH = 0x00100021, - WxHxD = 0x00100321 + Unknown = 0x00000000, + OffsetFirstElement = 0x00000100, + Stride1 = 0x00001001, + Stride2 = 0x00001002, + Stride3 = 0x00001003, + Stride4 = 0x00001004, + Dim0 = 0x00010000, + Dim1 = 0x00010001, + Dim2 = 0x00010002, + Dim3 = 0x00010003, + Dim4 = 0x00010004, + C = 0x00010000, // Dim0 + W = 0x00010001, // Dim1 + H = 0x00010002, // Dim2 + D = 0x00010003, + N = 0x00010004, + Dim1xDim2 = 0x00100021, + Dim1xDim2xDim3 = 0x00100321, + WxH = 0x00100021, + WxHxD = 0x00100321 }; inline std::string to_string(TensorComponent x) @@ -569,33 +593,33 @@ inline std::string to_string(TensorComponent x) switch(x) { case TensorComponent::Unknown: - return "Unknown"; + return "Unknown"; case TensorComponent::OffsetFirstElement: - return "OffsetFirstElement"; + return "OffsetFirstElement"; case TensorComponent::Stride1: - return "Stride1"; + return "Stride1"; case TensorComponent::Stride2: - return "Stride2"; + return "Stride2"; case TensorComponent::Stride3: - return "Stride3"; + return "Stride3"; case TensorComponent::Stride4: - return "Stride4"; + return "Stride4"; case TensorComponent::Dim0: - return "Dim0"; + return "Dim0"; case TensorComponent::Dim1: - return "Dim1"; + return "Dim1"; case TensorComponent::Dim2: - return "Dim2"; + return "Dim2"; case TensorComponent::Dim3: - return "Dim3"; + return "Dim3"; case TensorComponent::Dim4: - return "Dim4"; + return "Dim4"; case TensorComponent::Dim1xDim2: - return "Dim1xDim2"; + return "Dim1xDim2"; case TensorComponent::Dim1xDim2xDim3: - return "Dim1xDim2xDim3"; + return "Dim1xDim2xDim3"; default: - assert(false); + assert(false); } } @@ -603,6 +627,7 @@ class ITensorArgument { public: virtual ~ITensorArgument() = default; + /** Method to get the tensor component as a string * * @param[in] x tensor component to query @@ -610,21 +635,25 @@ public: * @return the tensor component as a string */ virtual std::string component(TensorComponent x) = 0; + /** Method to get the tensor component type declaration as a string * * @return the tensor component type declaration as a string */ virtual std::string component_type_declaration() const = 0; + /** Method to get the tensor component data type * * @return the tensor component data type */ virtual DataType component_data_type() const = 0; + /** Method to get the tensor component declarations * * @return a vector containing the tensor component declarations */ virtual std::vector component_declarations() const = 0; + /** Method to get the name of the tensor argument. * * @return the name of the tensor argument @@ -633,6 +662,7 @@ public: { return _basename; } + /** Method to get the tensor format * * @return the format @@ -643,8 +673,8 @@ public: } protected: - TensorInfo _format { }; - std::string _basename {}; + TensorInfo _format{}; + std::string _basename{}; }; enum class GpuTensorStorage : int32_t @@ -661,6 +691,7 @@ class IGpuTensorArgument : public ITensorArgument { public: virtual ~IGpuTensorArgument() = default; + /** Method to get the tensor storage, which is the underlying storage used to keep the data memory * * @param[in] x tensor storage to query @@ -668,6 +699,7 @@ public: * @return the tensor storage as a string */ virtual std::string storage(GpuTensorStorage x) = 0; + /** Method to get the tensor storage type declaration as a string * * @param[in] x tensor component to query @@ -675,6 +707,7 @@ public: * @return the tensor storage type declaration as a string */ virtual std::string storage_type_declaration(GpuTensorStorage x) const = 0; + /** Method to get the tensor storage declarations * * @return a vector containing the tensor storage declarations @@ -685,10 +718,10 @@ public: class ClTensorArgument : public IGpuTensorArgument { public: - ClTensorArgument(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible) + ClTensorArgument(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible) { - _basename = name; - _format = x; + _basename = name; + _format = x; _return_by_value_when_possible = return_by_value_when_possible; } @@ -714,12 +747,12 @@ public: switch(x) { case TensorComponent::Dim1xDim2: - return std::to_string(_format.shape[1] * _format.shape[2]); + return std::to_string(_format.shape[1] * _format.shape[2]); case TensorComponent::Dim1xDim2xDim3: - return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]); + return std::to_string(_format.shape[1] * _format.shape[2] * _format.shape[2]); default: - std::cout << "Unsupported folded dimension" << std::endl; - assert(false); + std::cout << "Unsupported folded dimension" << std::endl; + assert(false); } } } @@ -840,9 +873,9 @@ private: return var_name; } - bool _return_by_value_when_possible { false }; - std::vector _storage_required {}; - std::vector _components_required {}; + bool _return_by_value_when_possible{ false }; + std::vector _storage_required{}; + std::vector _components_required{}; }; /** @@ -858,32 +891,33 @@ private: class GpuTileRegistry { public: -enum class RegistryTileType -{ - Tile, - Link -}; + enum class RegistryTileType + { + Tile, + Link + }; -using RegistryIdSpace = int32_t; -using RegistryLevel = int32_t; -using RegistryTileName = std::string; + using RegistryIdSpace = int32_t; + using RegistryLevel = int32_t; + using RegistryTileName = std::string; -struct RegistryTileTableEntry -{ - RegistryLevel registry_level { 0 }; - std::unique_ptr tile_object { nullptr }; -}; + struct RegistryTileTableEntry + { + RegistryLevel registry_level{ 0 }; + std::unique_ptr tile_object{ nullptr }; + }; -struct RegistryTileTypeTableEntry -{ - RegistryTileType tile_type { RegistryTileType::Tile }; - RegistryTileName tile_name {}; - RegistryIdSpace registry_idspace { 0 }; - RegistryLevel registry_level { 0 }; -}; + struct RegistryTileTypeTableEntry + { + RegistryTileType tile_type{ RegistryTileType::Tile }; + RegistryTileName tile_name{}; + RegistryIdSpace registry_idspace{ 0 }; + RegistryLevel registry_level{ 0 }; + }; + + using RegistryTileTable = std::map>; + using RegistryTileTypeTable = std::map>; -using RegistryTileTable = std::map>; -using RegistryTileTypeTable = std::map>; /** * @brief Construct a new Gpu Tile Registry object * @@ -892,6 +926,7 @@ using RegistryTileTypeTable = std::mapfirst); @@ -945,16 +984,17 @@ using RegistryTileTypeTable = std::mapname(); - TileInfo format = frag->format(); + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = name; + const std::string var_name = frag->name(); + TileInfo format = frag->format(); // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; @@ -972,6 +1012,7 @@ using RegistryTileTypeTable = std::map tile = std::make_unique(var_name, format); + std::unique_ptr tile = std::make_unique(var_name, format); _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); _frags[key_IdSpace][key_var_name].registry_level = _registry_level; @@ -1002,6 +1043,7 @@ using RegistryTileTypeTable = std::map>& in, DataType dt) + void insert(const std::string &name, const std::vector> &in, DataType dt) { assert(_language == GpuTargetLanguage::OpenCL); const int32_t key_IdSpace = _IdSpace; @@ -1023,7 +1065,7 @@ using RegistryTileTypeTable = std::map tile = std::make_unique(in, dt); + std::unique_ptr tile = std::make_unique(in, dt); _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); _frags[key_IdSpace][key_var_name].registry_level = _registry_level; @@ -1033,6 +1075,7 @@ using RegistryTileTypeTable = std::map>& in, DataType dt) + IVectorTile *insert(const std::vector> &in, DataType dt) { assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++); + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = "_" + std::to_string(_anonymous_frag_count++); // First check whether a tile with the same name exists IVectorTile *result = (*this)[key_var_name]; assert(result == nullptr); if(result == nullptr) { - std::unique_ptr tile = std::make_unique(in, dt); + std::unique_ptr tile = std::make_unique(in, dt); _frags[key_IdSpace][key_var_name].tile_object = std::move(tile); _frags[key_IdSpace][key_var_name].registry_level = _registry_level; @@ -1067,6 +1110,7 @@ using RegistryTileTypeTable = std::map A vector with all the declared tiles in the IdSpace provided by the user */ - std::vector tile_declarations(int32_t IdSpace) + std::vector tile_declarations(int32_t IdSpace) { - std::vector tiles; + std::vector tiles; std::map::iterator it = _frag_types[IdSpace].begin(); - while (it != _frag_types[IdSpace].end()) + while(it != _frag_types[IdSpace].end()) { // The following line should be enabled. However, we cannot at this stage // because it used to retrieve the output tile produced by each component. @@ -1163,6 +1211,7 @@ using RegistryTileTypeTable = std::map::iterator it = _frags[_IdSpace].begin(); - while (it != _frags[_IdSpace].end()) + while(it != _frags[_IdSpace].end()) { - if (it->second.registry_level == _registry_level) + if(it->second.registry_level == _registry_level) { it = _frags[_IdSpace].erase(it); } @@ -1196,9 +1246,9 @@ using RegistryTileTypeTable = std::map::iterator it_type = _frag_types[_IdSpace].begin(); - while (it_type != _frag_types[_IdSpace].end()) + while(it_type != _frag_types[_IdSpace].end()) { - if (it_type->second.registry_level == _registry_level) + if(it_type->second.registry_level == _registry_level) { it_type = _frag_types[_IdSpace].erase(it_type); } @@ -1210,6 +1260,7 @@ using RegistryTileTypeTable = std::map= 0 ); + assert(_IdSpace >= 0); if(_registry_level == 0) { return "_G" + std::to_string(_IdSpace) + "_" + name; @@ -1233,12 +1284,13 @@ private: return name; } } - RegistryTileTable _frags {}; - RegistryTileTypeTable _frag_types {}; - RegistryLevel _registry_level { 0 }; - RegistryIdSpace _IdSpace { -1 }; - int32_t _anonymous_frag_count { 0 }; // Counter used to create the anonymous tiles - GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language + + RegistryTileTable _frags{}; + RegistryTileTypeTable _frag_types{}; + RegistryLevel _registry_level{ 0 }; + RegistryIdSpace _IdSpace{ -1 }; + int32_t _anonymous_frag_count{ 0 }; // Counter used to create the anonymous tiles + GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language }; using TensorEntry = std::unique_ptr; @@ -1260,6 +1312,7 @@ public: { _language = GpuTargetLanguage::Unknown; } + /** * @brief Construct a new Gpu Tensor Registry object * @@ -1269,11 +1322,13 @@ public: { _language = language; } + /** * @brief Default destructor. Destroy the Gpu Tensor Registry object * */ ~GpuTensorArgumentRegistry() = default; + /** * @brief Set the working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors. * Therefore, the IdSpace should be set before declaring any tensors. @@ -1284,6 +1339,7 @@ public: { _IdSpace = id; } + /** * @brief Get the current working IdSpace for the tensor registry. IdSpace is used to prevent name collisions when declaring tensors * @@ -1293,6 +1349,7 @@ public: { return _IdSpace; } + /** * @brief Gets all the IdSpace declarations defined in the tensor registry. * @@ -1304,7 +1361,7 @@ public: auto it = _refs.begin(); - while (it != _refs.end()) + while(it != _refs.end()) { x.push_back(it->first); @@ -1313,6 +1370,7 @@ public: return x; } + /** * @brief Declare a tensor with TensorInfo. The tensor will be stored in the IdSpace set with @p set_IdSpace() * @@ -1322,13 +1380,13 @@ public: * @param[in] x Pair of tensor info and tensor id * @param[in] return_by_value_when_possible True if we want the value stored in the tensor components */ - void insert(const std::string& name, const TensorInfo& x, bool return_by_value_when_possible) + void insert(const std::string &name, const TensorInfo &x, bool return_by_value_when_possible) { assert(_language == GpuTargetLanguage::OpenCL); - const int32_t key_IdSpace = _IdSpace; - const int32_t tensor_id = x.id; - const std::string key_var_name = name; - const std::string var_name = generate_tensor_name(name, tensor_id); + const int32_t key_IdSpace = _IdSpace; + const int32_t tensor_id = x.id; + const std::string key_var_name = name; + const std::string var_name = generate_tensor_name(name, tensor_id); // First, check whether the tensor has already a reference. If so, trigger an assert assert(!has_tensor_argument(name)); @@ -1338,12 +1396,14 @@ public: if(result == _tensor_arguments.end()) { // It means that we haven't added a tensor with that tensor_id yet. Create a IGpuTensorArgument before creating the reference - std::unique_ptr arg = std::make_unique(var_name, x, return_by_value_when_possible); - _tensor_arguments[tensor_id] = std::move(arg); + std::unique_ptr arg = std::make_unique(var_name, x, + return_by_value_when_possible); + _tensor_arguments[tensor_id] = std::move(arg); } _refs[key_IdSpace][key_var_name] = tensor_id; } + /** * @brief Get the tensor from the registry. This method searches the tensor in the IdSpace set with @p set_IdSpace() * @@ -1351,21 +1411,21 @@ public: * * @return IGpuTensor* The tensor */ - IGpuTensorArgument* operator[](const std::string& name) + IGpuTensorArgument *operator[](const std::string &name) { - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = name; - IGpuTensorArgument* result = nullptr; - auto search_IdSpace = _refs.find(key_IdSpace); + IGpuTensorArgument *result = nullptr; + auto search_IdSpace = _refs.find(key_IdSpace); if(search_IdSpace != _refs.end()) { auto search_tensor_id = _refs[key_IdSpace].find(key_var_name); if(search_tensor_id != _refs[key_IdSpace].end()) { - const int32_t tensor_id = search_tensor_id->second; - auto search_tensor_argument = _tensor_arguments.find(tensor_id); + const int32_t tensor_id = search_tensor_id->second; + auto search_tensor_argument = _tensor_arguments.find(tensor_id); if(search_tensor_argument != _tensor_arguments.end()) { result = search_tensor_argument->second.get(); @@ -1376,18 +1436,19 @@ public: return result; } + /** * @brief Get all the tensors declared in the IdSpace provided by the user * * @return std::vector A vector with all the declared tensors */ - std::vector tensor_argument_declarations() + std::vector tensor_argument_declarations() { - std::vector args; + std::vector args; auto it = _tensor_arguments.begin(); - while (it != _tensor_arguments.end()) + while(it != _tensor_arguments.end()) { args.push_back(it->second.get()); it++; @@ -1395,6 +1456,7 @@ public: return args; } + /** * @brief Check whether the tensor argument in the IdSpace set with @p set_IdSpace() exists * @@ -1403,10 +1465,10 @@ public: * @return true if the tensor argument exists * @return false if the tensor argument does not exist */ - bool has_tensor_argument(const std::string& name) + bool has_tensor_argument(const std::string &name) { - const int32_t key_IdSpace = _IdSpace; - const std::string key_var_name = name; + const int32_t key_IdSpace = _IdSpace; + const std::string key_var_name = name; auto search_IdSpace = _refs.find(key_IdSpace); @@ -1421,6 +1483,7 @@ public: return false; } } + /** * @brief Check whether the tensor argument is in the the IdSpace provided by the user * @@ -1430,10 +1493,10 @@ public: * @return true if the tile exists * @return false if the tile does not exist */ - bool has_tensor_argument(const std::string& name, int32_t IdSpace) + bool has_tensor_argument(const std::string &name, int32_t IdSpace) { - const int32_t key_IdSpace = IdSpace; - const std::string key_var_name = name; + const int32_t key_IdSpace = IdSpace; + const std::string key_var_name = name; auto search_IdSpace = _refs.find(key_IdSpace); @@ -1448,19 +1511,20 @@ public: return false; } } + private: // This method ensures that the key is unique among different components - std::string generate_tensor_name(const std::string& name, int32_t tensor_id) + std::string generate_tensor_name(const std::string &name, int32_t tensor_id) { - assert(tensor_id >= 0 ); + assert(tensor_id >= 0); return name + std::to_string(tensor_id); } - std::map _tensor_arguments {}; - std::map> _refs {}; - int32_t _IdSpace { -1 }; - GpuTargetLanguage _language { GpuTargetLanguage::Unknown }; // Gpu programming language + std::map _tensor_arguments{}; + std::map> _refs{}; + int32_t _IdSpace{ -1 }; + GpuTargetLanguage _language{ GpuTargetLanguage::Unknown }; // Gpu programming language }; enum class OpType : int32_t @@ -1587,11 +1651,19 @@ enum class OperandType : int32_t struct ScalarTileCoord { - ScalarTileCoord() {} - ScalarTileCoord(int32_t x0, int32_t y0) : x(x0), y(y0) {} - int32_t x { -1 }; - int32_t y { -1 }; + ScalarTileCoord() + { + } + + ScalarTileCoord(int32_t x0, int32_t y0) + : x(x0), y(y0) + { + } + + int32_t x{ -1 }; + int32_t y{ -1 }; }; + /** * @brief Operand class. This object is used to pass the operands to the operations performed by the writer. * Operand can be of three types: @@ -1609,7 +1681,7 @@ public: _type = OperandType::Tile; } - Operand(const std::string &val, const ScalarTileCoord& coord) + Operand(const std::string &val, const ScalarTileCoord &coord) { _str = val; _type = OperandType::ScalarTile; @@ -1622,13 +1694,13 @@ public: _type = type; } - Operand(const Operand& t) + Operand(const Operand &t) { _str = t.value(); _type = t.type(); } - Operand& operator=(const Operand& t) + Operand &operator=(const Operand &t) { _str = t.value(); _type = t.type(); @@ -1652,9 +1724,9 @@ public: } private: - std::string _str {}; - OperandType _type { OperandType::Unknown }; - ScalarTileCoord _coord {}; + std::string _str{}; + OperandType _type{ OperandType::Unknown }; + ScalarTileCoord _coord{}; }; enum class GpuSamplerTensorStorage : int32_t @@ -1670,14 +1742,17 @@ enum class GpuSamplerTensorStorage : int32_t struct GpuSampler { GpuSampler() = default; - TensorSamplerFormat format { TensorSamplerFormat::Unknown }; - GpuSamplerTensorStorage storage { GpuSamplerTensorStorage::Unknown }; - TensorSamplerAddressModeX address_mode_x { TensorSamplerAddressModeX::Unknown }; - TensorSamplerAddressModeY address_mode_y { TensorSamplerAddressModeY::Unknown }; - TensorSamplerAddressModeZ address_mode_z { TensorSamplerAddressModeZ::Unknown }; + + TensorSamplerFormat format{ TensorSamplerFormat::Unknown }; + GpuSamplerTensorStorage storage{ GpuSamplerTensorStorage::Unknown }; + TensorSamplerAddressModeX address_mode_x{ TensorSamplerAddressModeX::Unknown }; + TensorSamplerAddressModeY address_mode_y{ TensorSamplerAddressModeY::Unknown }; + TensorSamplerAddressModeZ address_mode_z{ TensorSamplerAddressModeZ::Unknown }; }; -inline GpuSampler create_simple_sampler(const TensorInfo* tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, int32_t step_z) +inline GpuSampler +create_simple_sampler(const TensorInfo *tensor_info_id, GpuSampler sampler, int32_t step_x, int32_t step_y, + int32_t step_z) { CKW_UNUSED(step_x, step_y, step_z); @@ -1697,19 +1772,19 @@ inline GpuSampler create_simple_sampler(const TensorInfo* tensor_info_id, GpuSam switch(sampler.format) { case TensorSamplerFormat::C_W_H: - dim_x = tensor[0]; - dim_y = tensor[1]; - dim_z = tensor[2]; - break; + dim_x = tensor[0]; + dim_y = tensor[1]; + dim_z = tensor[2]; + break; case TensorSamplerFormat::C_WH_1: - dim_x = tensor[0]; - dim_y = tensor[1] * tensor[2]; - dim_z = 1; - break; + dim_x = tensor[0]; + dim_y = tensor[1] * tensor[2]; + dim_z = 1; + break; default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + break; } if(dim_x == 1) @@ -1737,6 +1812,7 @@ class GpuOutputSampler { public: GpuOutputSampler() = default; + /** * @brief Method used to initialize the GpuOutputSampler. The GpuOutputSampler can be initialized only once * by the root component. Once initialized, all simpler components will need to used this sampler @@ -1747,7 +1823,8 @@ public: * @param[in] step_y Increment step in the Y direction. Not necessarily it is the same of m0 of tile! * @param[in] step_z Increment step in the Z direction. Not necessarily it is the same of d0 of tile! */ - void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z) + void initialize(const TensorInfo *tensor_info_id, GpuSamplerTensorStorage tensor_storage, + TensorSamplerFormat tensor_format, int32_t step_x, int32_t step_y, int32_t step_z) { assert(_is_initialized == false); @@ -1778,6 +1855,7 @@ public: { return _step_z; }; + private: GpuSampler create_sampler(GpuSamplerTensorStorage tensor_storage, TensorSamplerFormat tensor_format) { @@ -1804,19 +1882,19 @@ private: switch(tensor_format) { case TensorSamplerFormat::C_W_H: - dim_x = tensor[0]; - dim_y = tensor[1]; - dim_z = tensor[2]; - break; + dim_x = tensor[0]; + dim_y = tensor[1]; + dim_z = tensor[2]; + break; case TensorSamplerFormat::C_WH_1: - dim_x = tensor[0]; - dim_y = tensor[1] * tensor[2]; - dim_z = 1; - break; + dim_x = tensor[0]; + dim_y = tensor[1] * tensor[2]; + dim_z = 1; + break; default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + break; } if((dim_x % _step_x) != 0 && dim_x != 1) @@ -1837,12 +1915,13 @@ private: return sampler; } - GpuSampler _sampler { }; // GpuSampler - int32_t _step_x { 1 }; - int32_t _step_y { 1 }; - int32_t _step_z { 1 }; - const TensorInfo* _tensor_info_id { nullptr }; - bool _is_initialized { false }; + + GpuSampler _sampler{}; // GpuSampler + int32_t _step_x{ 1 }; + int32_t _step_y{ 1 }; + int32_t _step_z{ 1 }; + const TensorInfo *_tensor_info_id{ nullptr }; + bool _is_initialized{ false }; }; /** @@ -1851,11 +1930,12 @@ private: class TensorOperand { public: - TensorOperand(const std::string &val, GpuSampler sampler) : _str(val), _sampler(sampler) + TensorOperand(const std::string &val, GpuSampler sampler) + : _str(val), _sampler(sampler) { } - TensorOperand& operator=(const TensorOperand& t) + TensorOperand &operator=(const TensorOperand &t) { _str = t.value(); _sampler = t.sampler(); @@ -1873,8 +1953,8 @@ public: } private: - std::string _str {}; - GpuSampler _sampler {}; + std::string _str{}; + GpuSampler _sampler{}; }; /** @@ -1892,9 +1972,11 @@ public: * * @param[in] language Gpu programming language to use */ - GpuKernelWriterDataHolder(GpuTargetLanguage language) : tiles(language), arguments(language), code(""), _language(language) + GpuKernelWriterDataHolder(GpuTargetLanguage language) + : tiles(language), arguments(language), code(""), _language(language) { } + /** * @brief Get the Gpu programming language used * @@ -1904,6 +1986,7 @@ public: { return _language; } + /** * @brief @ref GpuTileRegistry * @@ -1932,9 +2015,9 @@ private: struct LWS { - int32_t x {1}; - int32_t y {1}; - int32_t z {1}; + int32_t x{ 1 }; + int32_t y{ 1 }; + int32_t z{ 1 }; }; /** @@ -1944,7 +2027,8 @@ struct LWS class OperandUnpacker { public: - OperandUnpacker(GpuTileRegistry& tiles, GpuTensorArgumentRegistry& arguments) : _tiles(tiles), _arguments(arguments) + OperandUnpacker(GpuTileRegistry &tiles, GpuTensorArgumentRegistry &arguments) + : _tiles(tiles), _arguments(arguments) { // Increase the level of the stack to allocate possible temporary tiles _tiles.increment_registry_level(); @@ -1956,7 +2040,7 @@ public: _tiles.decrement_registry_level(); } - IVectorTile* unpack(const Operand& src) + IVectorTile *unpack(const Operand &src) { // Get the tile if(src.type() == OperandType::Tile) @@ -1974,21 +2058,21 @@ public: assert(coord.x >= 0); assert(coord.y >= 0); auto val = _tiles[src.value()]->scalar(coord.x, coord.y); - return _tiles.insert({{{val.str}}}, val.type.dt); + return _tiles.insert({ { { val.str } } }, val.type.dt); } else { - return _tiles.insert({{{src.value()}}}, to_tile_data_type(src.type())); + return _tiles.insert({ { { src.value() } } }, to_tile_data_type(src.type())); } } // Create an anonymous tile with the tensor component else { assert(_arguments.has_tensor_argument(src.value())); - auto x = _arguments[src.value()]; + auto x = _arguments[src.value()]; const std::string val = x->component(to_tensor_component(src.type())); const DataType dt = x->component_data_type(); - return _tiles.insert({{{val}}}, dt); + return _tiles.insert({ { { val } } }, dt); } } @@ -2003,37 +2087,37 @@ private: switch(x) { case OperandType::TensorDim0: - return TensorComponent::Dim0; + return TensorComponent::Dim0; case OperandType::TensorDim1: - return TensorComponent::Dim1; + return TensorComponent::Dim1; case OperandType::TensorDim2: - return TensorComponent::Dim2; + return TensorComponent::Dim2; case OperandType::TensorDim3: - return TensorComponent::Dim3; + return TensorComponent::Dim3; case OperandType::TensorDim4: - return TensorComponent::Dim4; + return TensorComponent::Dim4; case OperandType::TensorStride1: - return TensorComponent::Stride1; + return TensorComponent::Stride1; case OperandType::TensorStride2: - return TensorComponent::Stride2; + return TensorComponent::Stride2; case OperandType::TensorStride3: - return TensorComponent::Stride3; + return TensorComponent::Stride3; case OperandType::TensorStride4: - return TensorComponent::Stride4; + return TensorComponent::Stride4; case OperandType::TensorDim1xDim2: - return TensorComponent::Dim1xDim2; + return TensorComponent::Dim1xDim2; case OperandType::TensorDim1xDim2xDim3: - return TensorComponent::Dim1xDim2xDim3; + return TensorComponent::Dim1xDim2xDim3; case OperandType::TensorDataOffset: - return TensorComponent::OffsetFirstElement; + return TensorComponent::OffsetFirstElement; default: - assert(false); - return TensorComponent::Unknown; + assert(false); + return TensorComponent::Unknown; } } - GpuTileRegistry& _tiles; - GpuTensorArgumentRegistry& _arguments; + GpuTileRegistry &_tiles; + GpuTensorArgumentRegistry &_arguments; }; /** @@ -2044,18 +2128,17 @@ private: class TensorOperandUnpacker { public: - TensorOperandUnpacker(GpuTensorArgumentRegistry& arguments) : _arguments(arguments) - { - }; + TensorOperandUnpacker(GpuTensorArgumentRegistry &arguments) + : _arguments(arguments){}; - IGpuTensorArgument* unpack(const TensorOperand& src) + IGpuTensorArgument *unpack(const TensorOperand &src) { assert(_arguments.has_tensor_argument(src.value())); return _arguments[src.value()]; } private: - GpuTensorArgumentRegistry& _arguments; + GpuTensorArgumentRegistry &_arguments; }; /** @@ -2067,19 +2150,19 @@ private: struct GpuKernel { // Compilation stage - std::string code {}; // Source code, required for the compilation stage - std::vector list_extensions{}; // Extensions, required for the compilation stage + std::string code{}; // Source code, required for the compilation stage + std::vector list_extensions{}; // Extensions, required for the compilation stage // Tuning stage - std::string config_id {}; // Unique id, required for the tuning stage - std::vector list_lws{}; // LWS to test, required for the tuning stage + std::string config_id{}; // Unique id, required for the tuning stage + std::vector list_lws{}; // LWS to test, required for the tuning stage // Dispatch stage - GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage - std::vector> list_tensor_storages; // List of tensor storages, required for the dispatch stage - std::vector> list_tensor_components;// List of tensor components (width, stride,..), required for the dispatch stage) + GpuOutputSampler output_sampler{}; // GpuOutputSampler, required for the dispatch stage + std::vector> list_tensor_storages; // List of tensor storages, required for the dispatch stage + std::vector> list_tensor_components; // List of tensor components (width, stride,..), required for the dispatch stage) }; // This function should produce an object with the source -inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string& name) +inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::string &name) { std::string code; code += "__kernel void "; @@ -2142,9 +2225,8 @@ inline std::string generate_code(GpuKernelWriterDataHolder &in, const std::strin class GpuTensor3dMapper { public: - GpuTensor3dMapper(IGpuTensorArgument* tensor, GpuSampler sampler) : _sampler(sampler), _tensor(tensor) - { - }; + GpuTensor3dMapper(IGpuTensorArgument *tensor, GpuSampler sampler) + : _sampler(sampler), _tensor(tensor){}; std::string tensor_component_x() const { @@ -2241,7 +2323,7 @@ public: bool is_one_component_x() const { - auto t = _tensor->format(); + auto t = _tensor->format(); const auto format = _sampler.format; switch(format) { @@ -2257,7 +2339,7 @@ public: bool is_one_component_y() const { - auto t = _tensor->format(); + auto t = _tensor->format(); const auto format = _sampler.format; switch(format) { @@ -2274,7 +2356,7 @@ public: bool is_one_component_z() const { - auto t = _tensor->format(); + auto t = _tensor->format(); const auto format = _sampler.format; switch(format) { @@ -2291,7 +2373,7 @@ public: bool is_one_component_batch() const { - auto t = _tensor->format(); + auto t = _tensor->format(); const auto format = _sampler.format; switch(format) { @@ -2310,19 +2392,19 @@ public: return _sampler; } - IGpuTensorArgument* tensor_argument() const + IGpuTensorArgument *tensor_argument() const { return _tensor; } private: GpuSampler _sampler; - IGpuTensorArgument* _tensor; + IGpuTensorArgument *_tensor; }; struct GpuKernelWriterAttribute { - bool return_tensor_component_by_value { false }; + bool return_tensor_component_by_value{ false }; }; enum class ConvertPolicy @@ -2345,35 +2427,58 @@ class IGpuKernelWriter { public: virtual ~IGpuKernelWriter() = default; + virtual void set_IdSpace(int32_t id) = 0; - virtual void import_tile(const std::string& dst, const IVectorTile *src) = 0; - virtual void declare_argument(const std::string& name, const TensorInfo& tensor) = 0; - virtual void declare_tile(const std::string& name, const TileInfo& info) = 0; - virtual void declare_const_tile(const std::string& name, const std::vector>& in, DataType dt) = 0; - virtual void write_text(const std::string& x) = 0; + + virtual void import_tile(const std::string &dst, const IVectorTile *src) = 0; + + virtual void declare_argument(const std::string &name, const TensorInfo &tensor) = 0; + + virtual void declare_tile(const std::string &name, const TileInfo &info) = 0; + + virtual void declare_const_tile(const std::string &name, const std::vector> &in, DataType dt) = 0; + + virtual void write_text(const std::string &x) = 0; + virtual void compound_statement_begin() = 0; + virtual void compound_statement_end() = 0; // Operations - virtual void op_get_global_id(const Operand& dst_var, int32_t dim) = 0; - virtual void op_get_global_coord(const Operand& dst, const Operand& step, const TensorOperand& tensor, int32_t dim) = 0; - virtual void op_get_global_batch(const Operand& dst, const TensorOperand& tensor) = 0; - virtual void op_get_global_size(const Operand& dst_var, int32_t dim) = 0; - virtual void op_binary_expression(const Operand& dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; - virtual void op_assign(const Operand& dst_name, const Operand& src_name) = 0; - virtual void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) = 0; - virtual void op_if(const Operand& lhs, BinaryOp op, const Operand& rhs) = 0; - virtual void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value, AssignmentOp update_op, const Operand& update_value) = 0; - virtual void op_load_indirect(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y_indirect, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0; - virtual void op_load_immediate(const TensorOperand& tensor, const Operand& dst, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32), const Operand& dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; - virtual void op_store_immediate(const TensorOperand& tensor, const Operand& src, const Operand& x, const Operand& y, const Operand& z, const Operand& b = Operand("0", OperandType::ScalarInt32)) = 0; - virtual void op_cast_expression(const Operand& dst, const Operand &src, ConvertPolicy policy) = 0; + virtual void op_get_global_id(const Operand &dst_var, int32_t dim) = 0; + + virtual void op_get_global_coord(const Operand &dst, const Operand &step, const TensorOperand &tensor, int32_t dim) = 0; + + virtual void op_get_global_batch(const Operand &dst, const TensorOperand &tensor) = 0; + + virtual void op_get_global_size(const Operand &dst_var, int32_t dim) = 0; + + virtual void op_binary_expression(const Operand &dst, const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; + + virtual void op_assign(const Operand &dst_name, const Operand &src_name) = 0; + + virtual void op_scalar_function(const Operand &dst_name, const Operand &src_name, ScalarUnaryFunction func) = 0; + + virtual void op_if(const Operand &lhs, BinaryOp op, const Operand &rhs) = 0; + + virtual void op_for_loop(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value, AssignmentOp update_op, const Operand &update_value) = 0; + + virtual void op_load_indirect(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y_indirect, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; + + virtual void op_load_immediate(const TensorOperand &tensor, const Operand &dst, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32), const Operand &dilation_y = Operand("1", OperandType::ScalarInt32)) = 0; + + virtual void op_store_immediate(const TensorOperand &tensor, const Operand &src, const Operand &x, const Operand &y, const Operand &z, const Operand &b = Operand("0", OperandType::ScalarInt32)) = 0; + + virtual void op_cast_expression(const Operand &dst, const Operand &src, ConvertPolicy policy) = 0; + virtual void op_return() = 0; + // virtual void op_else() = 0; // virtual void op_elseif() = 0; // Utils // It is the process of converting - virtual void util_get_indirect_buffer(const Operand& dst, const TensorOperand& tensor, const Operand& x, const Operand& y, const Operand& x_off, const Operand& y_off) = 0; + virtual void util_get_indirect_buffer(const Operand &dst, const TensorOperand &tensor, const Operand &x, + const Operand &y, const Operand &x_off, const Operand &y_off) = 0; }; enum class GpuLoadStoreType @@ -2385,15 +2490,25 @@ enum class GpuLoadStoreType class IGpuLoadStoreHelperWriter { public: - IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type) : _writer(x), _mapper(mapper), _type(type) {} + IGpuLoadStoreHelperWriter(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type) + : _writer(x), _mapper(mapper), _type(type) + { + } + IGpuLoadStoreHelperWriter(const IGpuLoadStoreHelperWriter &) = default; + IGpuLoadStoreHelperWriter &operator=(const IGpuLoadStoreHelperWriter &) = default; + virtual ~IGpuLoadStoreHelperWriter() = default; + virtual void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) = 0; - virtual void write(const std::pair& y) = 0; + + virtual void write(const std::pair &y) = 0; + virtual void finalize() = 0; + protected: - IGpuKernelWriter* _writer; + IGpuKernelWriter *_writer; GpuTensor3dMapper _mapper; GpuLoadStoreType _type; }; @@ -2401,14 +2516,17 @@ protected: class ClLoadStoreBufferHelperWriter : public IGpuLoadStoreHelperWriter { public: - ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type) + ClLoadStoreBufferHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) + : IGpuLoadStoreHelperWriter(x, mapper, type) { } ClLoadStoreBufferHelperWriter(const ClLoadStoreBufferHelperWriter &) = default; + ClLoadStoreBufferHelperWriter &operator=(const ClLoadStoreBufferHelperWriter &) = default; - static bool validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) + static bool + validate(IGpuKernelWriter *x, GpuTensor3dMapper mapper, GpuLoadStoreType type, IVectorTile *dst) { CKW_UNUSED(x, type, dst); @@ -2426,9 +2544,9 @@ public: _dst = dst; _ls_width_full = dst->format().w; - _coord_x = x->scalar(0, 0).str; - _coord_z = z->scalar(0, 0).str; - _coord_b = b->scalar(0, 0).str; + _coord_x = x->scalar(0, 0).str; + _coord_z = z->scalar(0, 0).str; + _coord_b = b->scalar(0, 0).str; _coord_orig_z = _coord_z; out_of_bound_initialize_x(_coord_x); @@ -2478,7 +2596,7 @@ public: */ } - void write(const std::pair& y) override + void write(const std::pair &y) override { int32_t idx_y = y.first; std::string coord_y = y.second; @@ -2517,17 +2635,18 @@ public: out_of_bound_finalize_z(); out_of_bound_finalize_x(); } + private: - IVectorTile* _dst { nullptr }; - int32_t _ls_width_full { 0 }; - std::vector _ls_width_part { }; - std::vector, std::string>> _leftovers_x {}; - std::string _coord_x {}; - std::string _coord_z {}; - std::string _coord_orig_z {}; - std::string _coord_b {}; + IVectorTile *_dst{ nullptr }; + int32_t _ls_width_full{ 0 }; + std::vector _ls_width_part{}; + std::vector, std::string>> _leftovers_x{}; + std::string _coord_x{}; + std::string _coord_z{}; + std::string _coord_orig_z{}; + std::string _coord_b{}; - void out_of_bound_initialize_x(std::string& coord) + void out_of_bound_initialize_x(std::string &coord) { if(_mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) { @@ -2567,7 +2686,7 @@ private: } }; - void out_of_bound_initialize_y(std::string& coord) + void out_of_bound_initialize_y(std::string &coord) { std::string max = ""; @@ -2577,45 +2696,45 @@ private: { case TensorSamplerAddressModeY::Skip: case TensorSamplerAddressModeY::ClampToBorder: - // NOTE: This line should not be moved outside of the switch statement. - // The reason for that is because when we query the component, the component is marked as used - // and added to the list of arguments of the kernel. Since, not in all cases this component is required, - // we should request the component only when used - max = _mapper.tensor_component_y(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; + // NOTE: This line should not be moved outside of the switch statement. + // The reason for that is because when we query the component, the component is marked as used + // and added to the list of arguments of the kernel. Since, not in all cases this component is required, + // we should request the component only when used + max = _mapper.tensor_component_y(); + _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); + _writer->compound_statement_begin(); + break; case TensorSamplerAddressModeY::SkipMinEdgeOnly: case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; + _writer->write_text("if(" + coord + " >= 0)\n"); + _writer->compound_statement_begin(); + break; case TensorSamplerAddressModeY::SkipMaxEdgeOnly: case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - max = _mapper.tensor_component_y(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; + max = _mapper.tensor_component_y(); + _writer->write_text("if(" + coord + " < " + max + ")\n"); + _writer->compound_statement_begin(); + break; case TensorSamplerAddressModeY::ClampToNearest: - max = _mapper.tensor_component_y(); - coord = "clamp(" + coord + ", 0, " + max + " - 1)"; - break; + max = _mapper.tensor_component_y(); + coord = "clamp(" + coord + ", 0, " + max + " - 1)"; + break; case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: - max = _mapper.tensor_component_y(); - coord = "min(" + coord + ", " + max + " - 1)"; - break; + max = _mapper.tensor_component_y(); + coord = "min(" + coord + ", " + max + " - 1)"; + break; case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - coord = "max(" + coord + ", 0)"; - break; + coord = "max(" + coord + ", 0)"; + break; case TensorSamplerAddressModeY::None: - break; + break; default: - std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; - assert(false); + std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; + assert(false); } }; - void out_of_bound_finalize_y(const std::string& dst) + void out_of_bound_finalize_y(const std::string &dst) { const auto address_mode_y = _mapper.gpu_sampler().address_mode_y; @@ -2627,8 +2746,8 @@ private: case TensorSamplerAddressModeY::Skip: case TensorSamplerAddressModeY::SkipMaxEdgeOnly: case TensorSamplerAddressModeY::SkipMinEdgeOnly: - _writer->compound_statement_end(); - break; + _writer->compound_statement_end(); + break; default: assert(false); @@ -2639,19 +2758,19 @@ private: case TensorSamplerAddressModeY::ClampToBorder: case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - _writer->write_text("else\n"); - _writer->compound_statement_begin(); - _writer->write_text(dst); - _writer->write_text(" = 0.0f;\n"); - _writer->compound_statement_end(); - break; + _writer->write_text("else\n"); + _writer->compound_statement_begin(); + _writer->write_text(dst); + _writer->write_text(" = 0.0f;\n"); + _writer->compound_statement_end(); + break; default: assert(false); } }; - void out_of_bound_initialize_z(std::string& coord) + void out_of_bound_initialize_z(std::string &coord) { std::string max = ""; @@ -2660,35 +2779,35 @@ private: switch(address_mode_z) { case TensorSamplerAddressModeZ::Skip: - max = _mapper.tensor_component_z(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; + max = _mapper.tensor_component_z(); + _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); + _writer->compound_statement_begin(); + break; case TensorSamplerAddressModeZ::SkipMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; + _writer->write_text("if(" + coord + " >= 0)\n"); + _writer->compound_statement_begin(); + break; case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: - max = _mapper.tensor_component_z(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; + max = _mapper.tensor_component_z(); + _writer->write_text("if(" + coord + " < " + max + ")\n"); + _writer->compound_statement_begin(); + break; case TensorSamplerAddressModeZ::ClampToNearest: - max = _mapper.tensor_component_z(); - coord = "clamp(" + coord + ", 0, " + max + " - 1)"; - break; + max = _mapper.tensor_component_z(); + coord = "clamp(" + coord + ", 0, " + max + " - 1)"; + break; case TensorSamplerAddressModeZ::ClampToMaxEdgeOnly: - max = _mapper.tensor_component_z(); - coord = "min(" + coord + ", " + max + " - 1)"; - break; + max = _mapper.tensor_component_z(); + coord = "min(" + coord + ", " + max + " - 1)"; + break; case TensorSamplerAddressModeZ::ClampToMinEdgeOnly: - coord = "max(" + coord + ", 0)"; - break; + coord = "max(" + coord + ", 0)"; + break; case TensorSamplerAddressModeZ::None: - break; + break; default: - std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; - assert(false); + std::cout << "Unsupported address mode for write_out_of_bound_check_yz" << std::endl; + assert(false); } }; @@ -2701,8 +2820,8 @@ private: case TensorSamplerAddressModeZ::Skip: case TensorSamplerAddressModeZ::SkipMinEdgeOnly: case TensorSamplerAddressModeZ::SkipMaxEdgeOnly: - _writer->compound_statement_end(); - break; + _writer->compound_statement_end(); + break; default: assert(false); @@ -2775,43 +2894,45 @@ private: return x; } - std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& address) + std::string to_ls_buffer(GpuLoadStoreType type, int32_t vector_width, const std::string &data, + const std::string &address) { switch(type) { case GpuLoadStoreType::Load: - if(vector_width != 1) - { - return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; - } - else - { - return data + " = *(" + address + ")"; - } - break; + if(vector_width != 1) + { + return data + " = vload" + std::to_string(vector_width) + "(0, " + address + ")"; + } + else + { + return data + " = *(" + address + ")"; + } + break; case GpuLoadStoreType::Store: - if(vector_width != 1) - { - return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; - } - else - { - return "*(" + address + ") = " + data; - } - break; + if(vector_width != 1) + { + return "vstore" + std::to_string(vector_width) + "(" + data + ", 0, " + address + ")"; + } + else + { + return "*(" + address + ") = " + data; + } + break; default: - std::cout << "Unsupported GpuLoadStoreType" << std::endl; - assert(false); - return ""; + std::cout << "Unsupported GpuLoadStoreType" << std::endl; + assert(false); + return ""; } } - std::string to_ls_buffer_address(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const + std::string to_ls_buffer_address(const std::string &x, const std::string &y, const std::string &z, + const std::string &b) const { - auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); + auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); assert(tensor_storage == GpuTensorStorage::BufferUint8Ptr); - const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); - const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); + const std::string ptr_buf = _mapper.tensor_argument()->storage(tensor_storage); + const std::string dst_type = get_cl_data_type(_dst->format().dt, 1); std::string address; address += "(__global "; @@ -2855,7 +2976,8 @@ private: class ClLoadStoreImage2dHelperWriter : public IGpuLoadStoreHelperWriter { public: - static bool validate(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type, IVectorTile *dst) + static bool + validate(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type, IVectorTile *dst) { CKW_UNUSED(x); @@ -2889,11 +3011,14 @@ public: - z: Only GpuSamplerAddressModeZ::None is supported */ } - ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) : IGpuLoadStoreHelperWriter(x, mapper, type) + + ClLoadStoreImage2dHelperWriter(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) + : IGpuLoadStoreHelperWriter(x, mapper, type) { } ClLoadStoreImage2dHelperWriter(const ClLoadStoreImage2dHelperWriter &) = default; + ClLoadStoreImage2dHelperWriter &operator=(const ClLoadStoreImage2dHelperWriter &) = default; void initialize(IVectorTile *dst, IVectorTile *x, IVectorTile *z, IVectorTile *b) override @@ -2918,7 +3043,7 @@ public: */ } - void write(const std::pair& y) override + void write(const std::pair &y) override { int32_t idx_y = y.first; std::string coord_y = y.second; @@ -2940,14 +3065,15 @@ public: void finalize() override { } + private: - IVectorTile* _dst { nullptr }; - int32_t _ls_width_full { 0 }; - std::string _coord_x {}; - std::string _coord_z {}; - std::string _coord_b {}; + IVectorTile *_dst{ nullptr }; + int32_t _ls_width_full{ 0 }; + std::string _coord_x{}; + std::string _coord_z{}; + std::string _coord_b{}; - void out_of_bound_initialize_y(std::string& coord) + void out_of_bound_initialize_y(std::string &coord) { std::string max = ""; @@ -2956,19 +3082,19 @@ private: switch(address_mode_y) { case TensorSamplerAddressModeY::Skip: - max = _mapper.tensor_component_y(); - _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); - _writer->compound_statement_begin(); - break; + max = _mapper.tensor_component_y(); + _writer->write_text("if((" + coord + " >= 0) && (" + coord + " < " + max + "))\n"); + _writer->compound_statement_begin(); + break; case TensorSamplerAddressModeY::SkipMinEdgeOnly: - _writer->write_text("if(" + coord + " >= 0)\n"); - _writer->compound_statement_begin(); - break; + _writer->write_text("if(" + coord + " >= 0)\n"); + _writer->compound_statement_begin(); + break; case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - max = _mapper.tensor_component_y(); - _writer->write_text("if(" + coord + " < " + max + ")\n"); - _writer->compound_statement_begin(); - break; + max = _mapper.tensor_component_y(); + _writer->write_text("if(" + coord + " < " + max + ")\n"); + _writer->compound_statement_begin(); + break; case TensorSamplerAddressModeY::ClampToBorder: case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: @@ -2976,14 +3102,14 @@ private: case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: case TensorSamplerAddressModeY::ClampToMinEdgeOnly: case TensorSamplerAddressModeY::None: - break; + break; default: - std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl; - assert(false); + std::cout << "Unsupported address mode for write_out_of_bound_check_y" << std::endl; + assert(false); } }; - void out_of_bound_finalize_y(const std::string& dst) + void out_of_bound_finalize_y(const std::string &dst) { CKW_UNUSED(dst); @@ -2994,35 +3120,36 @@ private: case TensorSamplerAddressModeY::Skip: case TensorSamplerAddressModeY::SkipMinEdgeOnly: case TensorSamplerAddressModeY::SkipMaxEdgeOnly: - _writer->compound_statement_end(); - break; + _writer->compound_statement_end(); + break; default: assert(false); } }; - std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string& data, const std::string& sampler, const std::string& coord) + std::string to_ls_image2d(GpuLoadStoreType type, int32_t vector_width, const std::string &data, + const std::string &sampler, const std::string &coord) { CKW_UNUSED(vector_width); - auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); - const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage); + auto tensor_storage = static_cast(_mapper.gpu_sampler().storage); + const std::string image2d_obj = _mapper.tensor_argument()->storage(tensor_storage); // const DataType dt = _dst->format().dt; - const std::string post_fix = _dst->format().dt == DataType::Fp32? "f" : "h"; + const std::string post_fix = _dst->format().dt == DataType::Fp32 ? "f" : "h"; switch(type) { case GpuLoadStoreType::Load: - return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")"; - break; + return data + " = read_image" + post_fix + "(" + image2d_obj + ", " + sampler + ", " + coord + ")"; + break; case GpuLoadStoreType::Store: - return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")"; + return "write_image" + post_fix + "(" + image2d_obj + ", " + coord + ", " + data + ")"; default: - assert(false); - std::cout << "Unsupported GpuLoadStoreType" << std::endl; - assert(false); - return ""; + assert(false); + std::cout << "Unsupported GpuLoadStoreType" << std::endl; + assert(false); + return ""; } } @@ -3033,26 +3160,27 @@ private: switch(address_mode_y) { case TensorSamplerAddressModeY::None: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; + return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_NONE | CLK_FILTER_NEAREST"; case TensorSamplerAddressModeY::Skip: case TensorSamplerAddressModeY::SkipMinEdgeOnly: case TensorSamplerAddressModeY::SkipMaxEdgeOnly: case TensorSamplerAddressModeY::ClampToBorder: case TensorSamplerAddressModeY::ClampToBorderMinEdgeOnly: case TensorSamplerAddressModeY::ClampToBorderMaxEdgeOnly: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST"; + return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP | CLK_FILTER_NEAREST"; case TensorSamplerAddressModeY::ClampToNearest: case TensorSamplerAddressModeY::ClampToMaxEdgeOnly: case TensorSamplerAddressModeY::ClampToMinEdgeOnly: - return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST"; + return "CLK_NORMALIZED_COORDS_FALSE | CLK_ADDRESS_CLAMP_TO_EDGE | CLK_FILTER_NEAREST"; default: - std::cout << "Unsupported address_mode_coord" << std::endl; - assert(false); - return ""; + std::cout << "Unsupported address_mode_coord" << std::endl; + assert(false); + return ""; } } - std::string to_ls_image2d_coord(const std::string& x, const std::string& y, const std::string& z, const std::string& b) const + std::string to_ls_image2d_coord(const std::string &x, const std::string &y, const std::string &z, + const std::string &b) const { std::string coord_x = "(" + x + ") >> 2"; std::string coord_y = "("; @@ -3094,7 +3222,8 @@ public: * * @return IGpuLoadStoreHelperWriter */ - static std::unique_ptr create(IGpuKernelWriter *x, const GpuTensor3dMapper& mapper, GpuLoadStoreType type) + static std::unique_ptr + create(IGpuKernelWriter *x, const GpuTensor3dMapper &mapper, GpuLoadStoreType type) { const auto tensor_storage = mapper.gpu_sampler().storage; switch(tensor_storage) @@ -3113,7 +3242,7 @@ public: }; // This utility method needs to go in utils.h -inline bool is_tile_scalar(IVectorTile* x) +inline bool is_tile_scalar(IVectorTile *x) { return x->format().w == 1 && x->format().h == 1; } @@ -3128,6 +3257,7 @@ public: } ClKernelWriter(const ClKernelWriter &) = default; + ClKernelWriter &operator=(const ClKernelWriter &) = default; // A IdSpaced ID is a term used to describe a fragment that is registered in ICode to ensure @@ -3138,18 +3268,18 @@ public: _data->arguments.set_IdSpace(id); } - void import_tile(const std::string& dst_name, const IVectorTile *src) override + void import_tile(const std::string &dst_name, const IVectorTile *src) override { _data->tiles.insert(dst_name, src); } - void declare_argument(const std::string& name, const TensorInfo& tensor) override + void declare_argument(const std::string &name, const TensorInfo &tensor) override { assert(_data->arguments[name] == nullptr); _data->arguments.insert(name, tensor, _attr->return_tensor_component_by_value); } - void declare_tile(const std::string& name, const TileInfo& format) override + void declare_tile(const std::string &name, const TileInfo &format) override { assert(_data->tiles[name] == nullptr); _data->tiles.insert(name, format); @@ -3162,14 +3292,15 @@ public: } } - void declare_const_tile(const std::string& name, const std::vector>& in, DataType dt) override + void declare_const_tile(const std::string &name, const std::vector> &in, + DataType dt) override { assert(_data->tiles[name] == nullptr); _data->tiles.insert(name, in, dt); // Note: A constant does not need to be declared in the code } - void write_text(const std::string& x) override + void write_text(const std::string &x) override { _data->code += x; } @@ -3186,12 +3317,11 @@ public: _data->code += "}\n"; } - void op_get_global_id(const Operand& dst_var, int32_t dim) override + void op_get_global_id(const Operand &dst_var, int32_t dim) override { assert(dst_var.type() == OperandType::Tile); assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && - _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable + assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable auto var = _data->tiles[dst_var.value()]; @@ -3201,96 +3331,96 @@ public: _data->code += ");\n"; }; - void op_get_global_coord(const Operand& o_dst, const Operand& o_step, const TensorOperand& o_tensor, int32_t dim) override + void op_get_global_coord(const Operand &o_dst, const Operand &o_step, const TensorOperand &o_tensor, + int32_t dim) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto step = operands.unpack(o_step); + auto dst = operands.unpack(o_dst); + auto step = operands.unpack(o_step); // Validation: Check that x, y and z are scalar TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); + auto tensor = tensor_operands.unpack(o_tensor); + auto gpu_sampler = o_tensor.sampler(); GpuTensor3dMapper mapper(tensor, gpu_sampler); - switch (dim) + switch(dim) { - case 0: - if(mapper.is_one_component_x()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + case 0: + if(mapper.is_one_component_x()) { - // Validation: Check: fixed tensor shape - // TO BE CHANGED _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(0) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; + _data->code += " = 0;\n"; } else + { + if(mapper.gpu_sampler().address_mode_x == TensorSamplerAddressModeX::OverlappingMin) + { + // Validation: Check: fixed tensor shape + // TO BE CHANGED + _data->code += dst->scalar(0, 0).str; + _data->code += " = get_global_id(0) * "; + _data->code += step->scalar(0, 0).str; + _data->code += ";\n"; + } + else + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = get_global_id(0) * "; + _data->code += step->scalar(0, 0).str; + _data->code += ";\n"; + } + } + break; + case 1: + if(mapper.is_one_component_y()) { _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(0) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; + _data->code += " = 0;\n"; } - } - break; - case 1: - if(mapper.is_one_component_y()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) + else { - + if(mapper.gpu_sampler().address_mode_y == TensorSamplerAddressModeY::OverlappingMin) + { + } + else + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = get_global_id(1) * "; + _data->code += step->scalar(0, 0).str; + _data->code += ";\n"; + } + } + break; + case 2: + if(mapper.is_one_component_z()) + { + _data->code += dst->scalar(0, 0).str; + _data->code += " = 0;\n"; } else { _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(1) * "; + _data->code += " = get_global_id(2) * "; _data->code += step->scalar(0, 0).str; _data->code += ";\n"; } - } - break; - case 2: - if(mapper.is_one_component_z()) - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = 0;\n"; - } - else - { - _data->code += dst->scalar(0, 0).str; - _data->code += " = get_global_id(2) * "; - _data->code += step->scalar(0, 0).str; - _data->code += ";\n"; - } - break; - default: - break; + break; + default: + break; } }; - void op_get_global_batch(const Operand& o_dst, const TensorOperand& o_tensor) override + void op_get_global_batch(const Operand &o_dst, const TensorOperand &o_tensor) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); + auto dst = operands.unpack(o_dst); TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); + auto tensor = tensor_operands.unpack(o_tensor); + auto gpu_sampler = o_tensor.sampler(); GpuTensor3dMapper mapper(tensor, gpu_sampler); @@ -3306,12 +3436,11 @@ public: } }; - void op_get_global_size(const Operand& dst_var, int32_t dim) override + void op_get_global_size(const Operand &dst_var, int32_t dim) override { assert(dst_var.type() == OperandType::Tile); assert(_data->tiles.has_tile(dst_var.value())); - assert(_data->tiles[dst_var.value()]->format().w == 1 && - _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable + assert(_data->tiles[dst_var.value()]->format().w == 1 && _data->tiles[dst_var.value()]->format().h == 1); // It must be a scalar variable auto var = _data->tiles[dst_var.value()]; @@ -3321,12 +3450,13 @@ public: _data->code += ");\n"; } - void op_binary_expression(const Operand& dst_name, const Operand& lhs_name, BinaryOp op, const Operand& rhs_name) override + void op_binary_expression(const Operand &dst_name, const Operand &lhs_name, BinaryOp op, + const Operand &rhs_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto lhs = operands.unpack(lhs_name); - auto rhs = operands.unpack(rhs_name); - auto dst = operands.unpack(dst_name); + auto lhs = operands.unpack(lhs_name); + auto rhs = operands.unpack(rhs_name); + auto dst = operands.unpack(dst_name); const int32_t dst_w = dst->format().w; const int32_t dst_h = dst->format().h; @@ -3361,8 +3491,8 @@ public: bool broadcast_lhs_x = dst_w != 1 && lhs_w == 1; bool broadcast_rhs_x = dst_w != 1 && rhs_w == 1; - std::string lhs_prefix = broadcast_lhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; - std::string rhs_prefix = broadcast_rhs_x? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; + std::string lhs_prefix = broadcast_lhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; + std::string rhs_prefix = broadcast_rhs_x ? "(" + dst->underlying_source_variables()[0].type.str + ")" : ""; std::string op_str = to_string(op); // Broadcasting on Y is automatic @@ -3379,17 +3509,17 @@ public: } }; - void op_cast_expression(const Operand& o_dst, const Operand &o_src, ConvertPolicy policy) override + void op_cast_expression(const Operand &o_dst, const Operand &o_src, ConvertPolicy policy) override { CKW_UNUSED(policy); OperandUnpacker operands(_data->tiles, _data->arguments); - auto src = operands.unpack(o_src); - auto dst = operands.unpack(o_dst); + auto src = operands.unpack(o_src); + auto dst = operands.unpack(o_dst); // const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const std::string dt = dst->scalar(0, 0).type.str; + const int32_t dst_h = dst->format().h; + const std::string dt = dst->scalar(0, 0).type.str; // Broadcasting on Y is automatic for(int32_t y = 0; y < dst_h; ++y) @@ -3401,21 +3531,21 @@ public: } }; - void op_assign(const Operand& dst_name, const Operand& src_name) override + void op_assign(const Operand &dst_name, const Operand &src_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto src = operands.unpack(src_name); - auto dst = operands.unpack(dst_name); + auto src = operands.unpack(src_name); + auto dst = operands.unpack(dst_name); - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const int32_t src_w = src->format().w; + const int32_t dst_w = dst->format().w; + const int32_t dst_h = dst->format().h; + const int32_t src_w = src->format().w; // const int32_t src_h = src->format().h; const std::string dt = dst->scalar(0, 0).type.str; bool broadcast_src_x = dst_w != 1 && src_w == 1; - std::string src_prefix = broadcast_src_x? "(" + dt + ")" : ""; + std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; // Broadcasting on Y is automatic for(int32_t y = 0; y < dst_h; ++y) @@ -3427,21 +3557,22 @@ public: } } - void op_scalar_function(const Operand& dst_name, const Operand& src_name, ScalarUnaryFunction func) override + void + op_scalar_function(const Operand &dst_name, const Operand &src_name, ScalarUnaryFunction func) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto src = operands.unpack(src_name); - auto dst = operands.unpack(dst_name); + auto src = operands.unpack(src_name); + auto dst = operands.unpack(dst_name); - const int32_t dst_w = dst->format().w; - const int32_t dst_h = dst->format().h; - const int32_t src_w = src->format().w; + const int32_t dst_w = dst->format().w; + const int32_t dst_h = dst->format().h; + const int32_t src_w = src->format().w; // const int32_t src_h = src->format().h; const std::string dt = dst->scalar(0, 0).type.str; bool broadcast_src_x = dst_w != 1 && src_w == 1; - std::string src_prefix = broadcast_src_x? "(" + dt + ")" : ""; + std::string src_prefix = broadcast_src_x ? "(" + dt + ")" : ""; // Broadcasting on Y is automatic for(int32_t y = 0; y < dst_h; ++y) @@ -3464,11 +3595,11 @@ public: } } - void op_if(const Operand& o_lhs, BinaryOp op, const Operand& o_rhs) override + void op_if(const Operand &o_lhs, BinaryOp op, const Operand &o_rhs) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto lhs = operands.unpack(o_lhs); - auto rhs = operands.unpack(o_rhs); + auto lhs = operands.unpack(o_lhs); + auto rhs = operands.unpack(o_rhs); assert(is_tile_scalar(lhs)); assert(is_tile_scalar(rhs)); @@ -3482,12 +3613,13 @@ public: _data->code += ")\n"; } - void op_for_loop(const Operand& var_name, BinaryOp cond_op, const Operand& cond_value_name, AssignmentOp update_op, const Operand& update_value_name) override + void op_for_loop(const Operand &var_name, BinaryOp cond_op, const Operand &cond_value_name, + AssignmentOp update_op, const Operand &update_value_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto var = operands.unpack(var_name); - auto cond_value = operands.unpack(cond_value_name); - auto update_value = operands.unpack(update_value_name); + auto var = operands.unpack(var_name); + auto cond_value = operands.unpack(cond_value_name); + auto update_value = operands.unpack(update_value_name); const int32_t dst_w = var->format().w; const int32_t dst_h = var->format().h; @@ -3497,7 +3629,7 @@ public: assert(dst_w == 1); assert(dst_h == 1); - _data->code += "for(; " ; + _data->code += "for(; "; _data->code += var->scalar(0, 0).str; _data->code += " "; _data->code += to_string(cond_op); @@ -3509,19 +3641,21 @@ public: _data->code += "\n"; } - void op_load_immediate(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_y, const Operand& o_z, const Operand& o_batch_idx, const Operand& dilation_y) override + void op_load_immediate(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x, + const Operand &o_y, const Operand &o_z, const Operand &o_batch_idx, + const Operand &dilation_y) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto x = operands.unpack(o_x); - auto y = operands.unpack(o_y); - auto z = operands.unpack(o_z); - auto dil_y = operands.unpack(dilation_y); - auto b = operands.unpack(o_batch_idx); + auto dst = operands.unpack(o_dst); + auto x = operands.unpack(o_x); + auto y = operands.unpack(o_y); + auto z = operands.unpack(o_z); + auto dil_y = operands.unpack(dilation_y); + auto b = operands.unpack(o_batch_idx); TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); + auto tensor = tensor_operands.unpack(o_tensor); + auto gpu_sampler = o_tensor.sampler(); GpuTensor3dMapper mapper(tensor, gpu_sampler); @@ -3543,18 +3677,20 @@ public: load_writer->finalize(); } - void op_load_indirect(const TensorOperand& o_tensor, const Operand& o_dst, const Operand& o_x, const Operand& o_indirect_h, const Operand& o_z, const Operand& o_batch_idx) override + void op_load_indirect(const TensorOperand &o_tensor, const Operand &o_dst, const Operand &o_x, + const Operand &o_indirect_h, const Operand &o_z, + const Operand &o_batch_idx) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto x = operands.unpack(o_x); - auto y_ind = operands.unpack(o_indirect_h); - auto z = operands.unpack(o_z); - auto b = operands.unpack(o_batch_idx); + auto dst = operands.unpack(o_dst); + auto x = operands.unpack(o_x); + auto y_ind = operands.unpack(o_indirect_h); + auto z = operands.unpack(o_z); + auto b = operands.unpack(o_batch_idx); TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); - auto gpu_sampler = o_tensor.sampler(); + auto tensor = tensor_operands.unpack(o_tensor); + auto gpu_sampler = o_tensor.sampler(); GpuTensor3dMapper mapper(tensor, gpu_sampler); @@ -3571,18 +3707,20 @@ public: load_writer->finalize(); } - void op_store_immediate(const TensorOperand& tensor_name, const Operand& src_name, const Operand& x_name, const Operand& y_name, const Operand& z_name, const Operand& batch_index_name) override + void op_store_immediate(const TensorOperand &tensor_name, const Operand &src_name, const Operand &x_name, + const Operand &y_name, const Operand &z_name, + const Operand &batch_index_name) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto src = operands.unpack(src_name); - auto x = operands.unpack(x_name); - auto y = operands.unpack(y_name); - auto z = operands.unpack(z_name); - auto b = operands.unpack(batch_index_name); + auto src = operands.unpack(src_name); + auto x = operands.unpack(x_name); + auto y = operands.unpack(y_name); + auto z = operands.unpack(z_name); + auto b = operands.unpack(batch_index_name); TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(tensor_name); - auto gpu_sampler = tensor_name.sampler(); + auto tensor = tensor_operands.unpack(tensor_name); + auto gpu_sampler = tensor_name.sampler(); GpuTensor3dMapper mapper(tensor, gpu_sampler); @@ -3606,17 +3744,18 @@ public: _data->code += "return;\n"; } - void util_get_indirect_buffer(const Operand& o_dst, const TensorOperand& o_tensor, const Operand& o_x, const Operand& o_y, const Operand& o_x_off, const Operand& o_y_off) override + void util_get_indirect_buffer(const Operand &o_dst, const TensorOperand &o_tensor, const Operand &o_x, + const Operand &o_y, const Operand &o_x_off, const Operand &o_y_off) override { OperandUnpacker operands(_data->tiles, _data->arguments); - auto dst = operands.unpack(o_dst); - auto x = operands.unpack(o_x); - auto y = operands.unpack(o_y); - auto x_off = operands.unpack(o_x_off); - auto y_off = operands.unpack(o_y_off); + auto dst = operands.unpack(o_dst); + auto x = operands.unpack(o_x); + auto y = operands.unpack(o_y); + auto x_off = operands.unpack(o_x_off); + auto y_off = operands.unpack(o_y_off); TensorOperandUnpacker tensor_operands(_data->arguments); - auto tensor = tensor_operands.unpack(o_tensor); + auto tensor = tensor_operands.unpack(o_tensor); assert(dst->format().w == 1); assert(x->format().w == 1); @@ -3706,8 +3845,8 @@ public: } private: - GpuKernelWriterDataHolder* _data { nullptr }; - GpuKernelWriterAttribute * _attr { nullptr }; + GpuKernelWriterDataHolder *_data{ nullptr }; + GpuKernelWriterAttribute *_attr{ nullptr }; }; /** IGpuKernelWriter factory class */ @@ -3720,7 +3859,8 @@ public: * * @return IGpuKernelWriter */ - static std::unique_ptr create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) + static std::unique_ptr + create(GpuKernelWriterAttribute *attr, GpuKernelWriterDataHolder *x) { switch(x->programming_language()) { @@ -3734,28 +3874,29 @@ public: } }; -inline int32_t adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx) +inline int32_t +adjust_step(TensorSamplerFormat tensor_format, int32_t step, const TensorInfo *tensor_info_id, int32_t idx) { auto tensor = tensor_info_id->shape; - int32_t dim[3] = {0}; + int32_t dim[3] = { 0 }; switch(tensor_format) { case TensorSamplerFormat::C_W_H: - dim[0] = tensor[0]; - dim[1] = tensor[1]; - dim[2] = tensor[2]; - break; + dim[0] = tensor[0]; + dim[1] = tensor[1]; + dim[2] = tensor[2]; + break; case TensorSamplerFormat::C_WH_1: - dim[0] = tensor[0]; - dim[1] = tensor[1] * tensor[2]; - dim[2] = 1; - break; + dim[0] = tensor[0]; + dim[1] = tensor[1] * tensor[2]; + dim[2] = 1; + break; default: - std::cout << "Unsupported tensor format" << std::endl; - assert(false); - break; + std::cout << "Unsupported tensor format" << std::endl; + assert(false); + break; } return std::min(step, dim[idx]); diff --git a/compute_kernel_writer/prototype/src/TileInfo.cpp b/compute_kernel_writer/prototype/src/TileInfo.cpp index 7d8b2654ef..66d8cb1620 100644 --- a/compute_kernel_writer/prototype/src/TileInfo.cpp +++ b/compute_kernel_writer/prototype/src/TileInfo.cpp @@ -27,17 +27,17 @@ namespace ckw { TileInfo::TileInfo(DataType dt) - : _dt(dt), _shape({{1, 1}}) + : _dt(dt), _shape({ { 1, 1 } }) { } TileInfo::TileInfo(DataType dt, int32_t w) - : _dt(dt), _shape({{w, 1}}) + : _dt(dt), _shape({ { w, 1 } }) { } TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) - : _dt(dt), _shape({{w, h}}) + : _dt(dt), _shape({ { w, h } }) { } diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp index 091947628d..fcb3cb6415 100644 --- a/compute_kernel_writer/prototype/src/TileOperand.cpp +++ b/compute_kernel_writer/prototype/src/TileOperand.cpp @@ -58,7 +58,8 @@ prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter switch(_info.data_type()) { case DataType::Int32: - return prototype::Operand(std::to_string(_value.get()), prototype::OperandType::ScalarInt32); + return prototype::Operand(std::to_string(_value.get()), + prototype::OperandType::ScalarInt32); case DataType::Fp32: return prototype::Operand(std::to_string(_value.get()), prototype::OperandType::ScalarFp32); diff --git a/compute_kernel_writer/src/Error.cpp b/compute_kernel_writer/src/Error.cpp index 7f2fb41187..c5dae2eb75 100644 --- a/compute_kernel_writer/src/Error.cpp +++ b/compute_kernel_writer/src/Error.cpp @@ -28,7 +28,8 @@ namespace ckw { -std::string create_error_msg(const std::string &file, const std::string &func, const std::string &line, const std::string &msg) +std::string create_error_msg(const std::string &file, const std::string &func, const std::string &line, + const std::string &msg) { std::string err; err += "[COMPUTE_KERNEL_WRITER][ERROR]:"; diff --git a/compute_kernel_writer/src/Helpers.h b/compute_kernel_writer/src/Helpers.h index f7ba7cec1c..16c06d60e7 100644 --- a/compute_kernel_writer/src/Helpers.h +++ b/compute_kernel_writer/src/Helpers.h @@ -48,9 +48,9 @@ std::string dec_to_hex_as_string(int32_t dec); * @return the clamped value */ template -T clamp(const T& val, const T& min_val, const T& max_val) +T clamp(const T &val, const T &min_val, const T &max_val) { return std::max(min_val, std::min(val, max_val)); } -} +} // namespace ckw #endif /* COMPUTE_KERNEL_WRITER_SRC_HELPERS_H */ diff --git a/compute_kernel_writer/src/ITile.h b/compute_kernel_writer/src/ITile.h index ef0df2116a..c036907561 100644 --- a/compute_kernel_writer/src/ITile.h +++ b/compute_kernel_writer/src/ITile.h @@ -37,15 +37,15 @@ using TileContainer = std::vector>; /** Tile descriptor which reports the underlying datatype and vector length */ struct TileVariableDescriptor { - DataType dt { DataType::Unknown }; /** Data type */ - int32_t len { 1 }; /** Number of elements in a single variable. For example, 1 for scalar */ + DataType dt{ DataType::Unknown }; /** Data type */ + int32_t len{ 1 }; /** Number of elements in a single variable. For example, 1 for scalar */ }; /** Tile variable */ struct TileVariable { - std::string str {""}; /** Tile variable as a string */ - TileVariableDescriptor desc {}; /** Tile value descriptor which reports the datatype and vector length */ + std::string str{ "" }; /** Tile variable as a string */ + TileVariableDescriptor desc{}; /** Tile value descriptor which reports the datatype and vector length */ }; /** Tile base class. @@ -55,11 +55,13 @@ class ITile { public: virtual ~ITile() = default; + /** Method to get all TileVariable objects * * @return a vector containing all @ref TileVariable objects */ virtual std::vector all() const = 0; + /** Method to get the name of the tile. * * @return the name of the tile @@ -68,6 +70,7 @@ public: { return _basename; } + /** Method to get the tile info * * @return the @ref TileInfo @@ -76,6 +79,7 @@ public: { return _info; } + /** Method to know whether the tile is assignable or not. * For example, a constant tile is not assignable. * @@ -84,8 +88,8 @@ public: virtual bool is_assignable() const = 0; protected: - TileInfo _info { DataType::Unknown }; // Tile info - std::string _basename { "" }; // Tile name + TileInfo _info{ DataType::Unknown }; // Tile info + std::string _basename{ "" }; // Tile name }; /** Tile base class to store scalar variables. @@ -94,6 +98,7 @@ class IScalarTile : public ITile { public: virtual ~IScalarTile() = default; + /** Method to get the scalar variable from a tile as a string * @param[in] row Tile row. If out-of-bound, the row is clamped to the nearest valid edge * @param[in] col Tile column. If out-of-bound, the column is clamped to the nearest valid edge @@ -109,6 +114,7 @@ class IVectorTile : public IScalarTile { public: virtual ~IVectorTile() = default; + /** Method to get the vector variable from a tile. * The user can query the list of supported vector lengths through the supported_vector_lengths() method. * @@ -117,6 +123,7 @@ public: * @return the vector variable as a @ref TileVariable */ virtual TileVariable vector(int32_t row) const = 0; + /** Method to get a sub-vector variable. The length of the sub-vector must be supported by the derived IVectorTile class * * @param[in] row Tile row. If out-of-bound, the row is clamped to the nearest valid edge @@ -126,6 +133,7 @@ public: * @return the vector variable as a @ref TileVariable */ virtual TileVariable vector(int32_t row, int32_t col_start, int32_t width) const = 0; + /** Method to get the supported vector length. * * @return a vector containing the supported vector lengths diff --git a/compute_kernel_writer/src/TensorUtils.h b/compute_kernel_writer/src/TensorUtils.h index 4be0395435..84eca084bb 100644 --- a/compute_kernel_writer/src/TensorUtils.h +++ b/compute_kernel_writer/src/TensorUtils.h @@ -52,5 +52,5 @@ TensorComponent get_tensor_dimension(TensorDataLayout layout, TensorDataLayoutCo * @return the @ref TensorComponent */ TensorComponent get_tensor_stride(TensorDataLayout layout, TensorDataLayoutComponent component); -} +} // namespace ckw #endif /* COMPUTE_KERNEL_WRITER_SRC_TENSORUTILS_H */ diff --git a/compute_kernel_writer/src/TileInfo.cpp b/compute_kernel_writer/src/TileInfo.cpp index 7d8b2654ef..66d8cb1620 100644 --- a/compute_kernel_writer/src/TileInfo.cpp +++ b/compute_kernel_writer/src/TileInfo.cpp @@ -27,17 +27,17 @@ namespace ckw { TileInfo::TileInfo(DataType dt) - : _dt(dt), _shape({{1, 1}}) + : _dt(dt), _shape({ { 1, 1 } }) { } TileInfo::TileInfo(DataType dt, int32_t w) - : _dt(dt), _shape({{w, 1}}) + : _dt(dt), _shape({ { w, 1 } }) { } TileInfo::TileInfo(DataType dt, int32_t h, int32_t w) - : _dt(dt), _shape({{w, h}}) + : _dt(dt), _shape({ { w, h } }) { } diff --git a/compute_kernel_writer/src/cl/CLConstantTile.cpp b/compute_kernel_writer/src/cl/CLConstantTile.cpp index 1477a683e6..e2acffb99b 100644 --- a/compute_kernel_writer/src/cl/CLConstantTile.cpp +++ b/compute_kernel_writer/src/cl/CLConstantTile.cpp @@ -25,8 +25,8 @@ #include "ckw/TileInfo.h" #include "src/Helpers.h" -#include "src/cl/CLHelpers.h" #include "src/cl/CLConstantTile.h" +#include "src/cl/CLHelpers.h" namespace ckw { diff --git a/compute_kernel_writer/src/cl/CLConstantTile.h b/compute_kernel_writer/src/cl/CLConstantTile.h index ebd0f04659..c8318487e6 100644 --- a/compute_kernel_writer/src/cl/CLConstantTile.h +++ b/compute_kernel_writer/src/cl/CLConstantTile.h @@ -48,13 +48,17 @@ public: // Inherited method overridden TileVariable scalar(int32_t row, int32_t col) const override; + TileVariable vector(int32_t row) const override; + TileVariable vector(int32_t row, int32_t col_start, int32_t width) const override; + std::vector all() const override; + bool is_assignable() const override; private: - TileContainer _vals {}; + TileContainer _vals{}; }; } // namespace ckw diff --git a/compute_kernel_writer/src/cl/CLTile.cpp b/compute_kernel_writer/src/cl/CLTile.cpp index bc544ecedf..cb0b22a23b 100644 --- a/compute_kernel_writer/src/cl/CLTile.cpp +++ b/compute_kernel_writer/src/cl/CLTile.cpp @@ -68,7 +68,7 @@ TileVariable CLTile::vector(int32_t row) const row = clamp(row, static_cast(0), _info.height() - 1); TileVariable t; - t.str = create_var_name(row); + t.str = create_var_name(row); t.desc.dt = _info.data_type(); t.desc.len = _info.width(); return t; @@ -104,7 +104,7 @@ std::vector CLTile::all() const for(int32_t y = 0; y < _info.height(); ++y) { TileVariable t; - t.str = create_var_name(y); + t.str = create_var_name(y); t.desc.dt = _info.data_type(); t.desc.len = _info.width(); vars.push_back(t); @@ -125,7 +125,6 @@ std::string CLTile::create_var_name(int32_t row) const if(_info.height() == 1) { return var_name; - } else { diff --git a/compute_kernel_writer/src/cl/CLTile.h b/compute_kernel_writer/src/cl/CLTile.h index 285f0b6e58..039bd5613f 100644 --- a/compute_kernel_writer/src/cl/CLTile.h +++ b/compute_kernel_writer/src/cl/CLTile.h @@ -44,13 +44,17 @@ public: * @param[in] name Tile name * @param[in] info Tile info */ - CLTile(const std::string& name, const TileInfo &info); + CLTile(const std::string &name, const TileInfo &info); // Inherited method overridden TileVariable scalar(int32_t row, int32_t col) const override; + TileVariable vector(int32_t row) const override; + TileVariable vector(int32_t row, int32_t col_start, int32_t width) const override; + std::vector all() const override; + bool is_assignable() const override; private: diff --git a/compute_kernel_writer/src/cl/ICLTile.cpp b/compute_kernel_writer/src/cl/ICLTile.cpp index f9c8827ef4..38418b5c2a 100644 --- a/compute_kernel_writer/src/cl/ICLTile.cpp +++ b/compute_kernel_writer/src/cl/ICLTile.cpp @@ -33,7 +33,7 @@ namespace ckw { std::vector ICLTile::supported_vector_lengths() const { - return std::vector {1, 2, 3, 4, 8, 16}; + return std::vector{ 1, 2, 3, 4, 8, 16 }; } void ICLTile::validate_tile_info(const TileInfo &info) const diff --git a/compute_kernel_writer/validation/Validation.cpp b/compute_kernel_writer/validation/Validation.cpp index 0a288837d8..16c0d7696d 100644 --- a/compute_kernel_writer/validation/Validation.cpp +++ b/compute_kernel_writer/validation/Validation.cpp @@ -36,7 +36,7 @@ using namespace ckw; */ int32_t main() { - std::vector tests; + std::vector tests; // Add your test here const auto test0 = std::make_unique(); @@ -47,13 +47,13 @@ int32_t main() tests.push_back(test2.get()); #ifdef COMPUTE_KERNEL_WRITER_OPENCL_ENABLED - const auto test3 = std::make_unique(); - const auto test4 = std::make_unique(); - const auto test5 = std::make_unique(); - const auto test6 = std::make_unique(); - const auto test7 = std::make_unique(); - const auto test8 = std::make_unique(); - const auto test9 = std::make_unique(); + const auto test3 = std::make_unique(); + const auto test4 = std::make_unique(); + const auto test5 = std::make_unique(); + const auto test6 = std::make_unique(); + const auto test7 = std::make_unique(); + const auto test8 = std::make_unique(); + const auto test9 = std::make_unique(); const auto test10 = std::make_unique(); const auto test11 = std::make_unique(); const auto test12 = std::make_unique(); diff --git a/compute_kernel_writer/validation/tests/CLConstantTileTest.hpp b/compute_kernel_writer/validation/tests/CLConstantTileTest.hpp index 33942c707d..23a75c4ca9 100644 --- a/compute_kernel_writer/validation/tests/CLConstantTileTest.hpp +++ b/compute_kernel_writer/validation/tests/CLConstantTileTest.hpp @@ -25,10 +25,10 @@ #ifndef COMPUTE_KERNEL_WRITER_TESTS_CLCONSTANTTILETEST_HPP #define COMPUTE_KERNEL_WRITER_TESTS_CLCONSTANTTILETEST_HPP +#include "common/Common.h" #include "src/Helpers.h" #include "src/cl/CLConstantTile.h" #include "src/cl/CLHelpers.h" -#include "common/Common.h" #include #include @@ -41,9 +41,10 @@ class CLConstantTileInternalValuesTest : public ITest public: CLConstantTileInternalValuesTest() { - _values.push_back({{"1.2", "3.5"}, {"4.2", "1.3"}}); - _values.push_back({{"1.2"}}); - _values.push_back({{"1.2", "6.9"}}); + _values.push_back({ { "1.2", "3.5" }, + { "4.2", "1.3" } }); + _values.push_back({ { "1.2" } }); + _values.push_back({ { "1.2", "6.9" } }); } bool run() override @@ -55,19 +56,18 @@ public: for(const auto &test : _values) { const CLConstantTile tile(test, DataType::Fp16); - const auto vars = tile.all(); - const int32_t num_vars = vars.size(); - const int32_t width = tile.info().width(); + const auto vars = tile.all(); + const int32_t num_vars = vars.size(); + const int32_t width = tile.info().width(); for(int32_t y = 0; y < num_vars; ++y) { - const int32_t col = y % width; - const int32_t row = y / width; + const int32_t col = y % width; + const int32_t row = y / width; const std::string expected_var_name = "((half)(" + test[row][col] + "))"; - const std::string actual_var_name = vars[y].str; + const std::string actual_var_name = vars[y].str; VALIDATE_TEST(actual_var_name.compare(expected_var_name) == 0, all_tests_passed, test_idx++); } - } return all_tests_passed; } @@ -78,7 +78,7 @@ public: } private: - std::vector _values {}; + std::vector _values{}; }; class CLConstantTileAccessScalarVariableBroadcastXTest : public ITest @@ -113,8 +113,8 @@ public: const size_t num_coords = _x_coord.size(); - std::random_device rd; - std::mt19937 gen(rd()); + std::random_device rd; + std::mt19937 gen(rd()); std::uniform_real_distribution<> dist(-1, 1); int32_t test_idx = 0; @@ -140,7 +140,7 @@ public: const TileVariable var = tile.scalar(y_coord, x_coord); - const std::string actual_var_name = var.str; + const std::string actual_var_name = var.str; const std::string expected_var_name = "((half)(" + container[y_coord][x_coord_clamped] + "))"; VALIDATE_TEST(actual_var_name.compare(expected_var_name) == 0, all_tests_passed, test_idx++); @@ -154,9 +154,9 @@ public: } private: - std::vector _width {}; - std::vector _x_coord {}; - std::vector _y_coord {}; + std::vector _width{}; + std::vector _x_coord{}; + std::vector _y_coord{}; }; class CLConstantTileAccessScalarVariableBroadcastYTest : public ITest @@ -189,8 +189,8 @@ public: // The status of this variable can change in VALIDATE_TEST() bool all_tests_passed = true; - std::random_device rd; - std::mt19937 gen(rd()); + std::random_device rd; + std::mt19937 gen(rd()); std::uniform_real_distribution<> dist(-1, 1); const size_t num_coords = _x_coord.size(); @@ -218,7 +218,7 @@ public: const TileVariable var = tile.scalar(y_coord, x_coord); - const std::string actual_var_name = var.str; + const std::string actual_var_name = var.str; const std::string expected_var_name = "((half)(" + container[y_coord_clamped][x_coord] + "))"; VALIDATE_TEST(actual_var_name.compare(expected_var_name) == 0, all_tests_passed, test_idx++); @@ -232,9 +232,9 @@ public: } private: - std::vector _height {}; - std::vector _x_coord {}; - std::vector _y_coord {}; + std::vector _height{}; + std::vector _x_coord{}; + std::vector _y_coord{}; }; class CLConstantTileAccessVectorVariablesTest : public ITest @@ -244,10 +244,11 @@ public: CLConstantTileAccessVectorVariablesTest() { - _values.push_back({{"1.2", "3.5"}, {"4.2", "1.3"}}); - _values.push_back({{"1.2"}}); + _values.push_back({ { "1.2", "3.5" }, + { "4.2", "1.3" } }); + _values.push_back({ { "1.2" } }); // Mix variable names and values - _values.push_back({{"1.2", "acc", "8.7", "9.3", "ratio", "2.9", "1.7", "0.3"}}); + _values.push_back({ { "1.2", "acc", "8.7", "9.3", "ratio", "2.9", "1.7", "0.3" } }); } bool run() override @@ -260,8 +261,8 @@ public: for(const auto &test : _values) { const CLConstantTile tile(test, dt); - const int32_t width = tile.info().width(); - const int32_t height = tile.info().height(); + const int32_t width = tile.info().width(); + const int32_t height = tile.info().height(); for(int32_t row = 0; row < height; ++row) { @@ -292,7 +293,7 @@ public: } private: - std::vector _values {}; + std::vector _values{}; }; class CLConstantTileAccessSubVectorVariablesTest : public ITest @@ -302,7 +303,7 @@ public: CLConstantTileAccessSubVectorVariablesTest() { - _values.push_back({{"1.2", "acc", "8.7", "9.3", "ratio", "2.9", "1.7", "0.3"}}); + _values.push_back({ { "1.2", "acc", "8.7", "9.3", "ratio", "2.9", "1.7", "0.3" } }); _subwidths.push_back(1); _subwidths.push_back(2); _subwidths.push_back(3); @@ -326,7 +327,7 @@ public: for(auto &subwidth : _subwidths) { const CLConstantTile tile(test, dt); - const int32_t height = tile.info().height(); + const int32_t height = tile.info().height(); for(int32_t row = 0; row < height; ++row) { @@ -345,7 +346,8 @@ public: expected_var_name += "))"; const std::string actual_var_name = tile.vector(row, col_start, subwidth).str; - VALIDATE_TEST(actual_var_name.compare(expected_var_name) == 0, all_tests_passed, test_idx++); + VALIDATE_TEST(actual_var_name.compare(expected_var_name) == 0, all_tests_passed, + test_idx++); } } } @@ -359,9 +361,9 @@ public: } private: - std::vector _values {}; - std::vector _subwidths {}; - std::vector _offsets {}; + std::vector _values{}; + std::vector _subwidths{}; + std::vector _offsets{}; }; } // namespace ckw diff --git a/compute_kernel_writer/validation/tests/CLTileTest.hpp b/compute_kernel_writer/validation/tests/CLTileTest.hpp index 7847f0e128..ecfe811267 100644 --- a/compute_kernel_writer/validation/tests/CLTileTest.hpp +++ b/compute_kernel_writer/validation/tests/CLTileTest.hpp @@ -25,9 +25,9 @@ #ifndef COMPUTE_KERNEL_WRITER_TESTS_CLTILETEST_HPP #define COMPUTE_KERNEL_WRITER_TESTS_CLTILETEST_HPP +#include "common/Common.h" #include "src/Helpers.h" #include "src/cl/CLTile.h" -#include "common/Common.h" #include #include @@ -37,9 +37,9 @@ namespace ckw class CLTileInternalVariableNamesTest : public ITest { public: - const int32_t width = 4; - const int32_t height = 4; - const DataType dt = DataType::Fp32; + const int32_t width = 4; + const int32_t height = 4; + const DataType dt = DataType::Fp32; CLTileInternalVariableNamesTest() { @@ -59,12 +59,12 @@ public: for(const auto &tile_name : _tile_name) { const CLTile tile(tile_name, info); - const auto vars = tile.all(); + const auto vars = tile.all(); for(int32_t y = 0; y < height; ++y) { const std::string expected_var_name = tile_name + "_" + std::to_string(y); - const std::string actual_var_name = vars[y].str; + const std::string actual_var_name = vars[y].str; VALIDATE_TEST(actual_var_name.compare(expected_var_name) == 0, all_tests_passed, test_idx++); } } @@ -77,7 +77,7 @@ public: } private: - std::vector _tile_name {}; + std::vector _tile_name{}; }; class CLTileInternalNumVariablesTest : public ITest @@ -106,12 +106,12 @@ public: int32_t test_idx = 0; for(size_t i = 0; i < num_dims; ++i) { - const int32_t width = _width[i]; - const int32_t height = _height[i]; + const int32_t width = _width[i]; + const int32_t height = _height[i]; const TileInfo info(DataType::Fp32, height, width); - const CLTile tile("src", info); - const auto vars = tile.all(); - const int32_t num_vars = vars.size(); + const CLTile tile("src", info); + const auto vars = tile.all(); + const int32_t num_vars = vars.size(); // We expect the number of variables to match the heigth of the tile VALIDATE_TEST(num_vars == height, all_tests_passed, test_idx++); @@ -125,8 +125,8 @@ public: } private: - std::vector _width {}; - std::vector _height {}; + std::vector _width{}; + std::vector _height{}; }; class CLTileAccessScalarVariableTest : public ITest @@ -153,7 +153,7 @@ public: bool run() override { const TileInfo info(dt, height, width); - const CLTile tile(tile_name, info); + const CLTile tile(tile_name, info); VALIDATE_ON_MSG(_x_coord.size() == _y_coord.size(), "The number of x-coords and y-coords does not match"); @@ -170,8 +170,8 @@ public: const TileVariable var = tile.scalar(y_coord, x_coord); - const std::string actual_var_name = var.str; - std::string expected_var_name = tile_name; + const std::string actual_var_name = var.str; + std::string expected_var_name = tile_name; expected_var_name += "_" + std::to_string(y_coord); expected_var_name += ".s" + dec_to_hex_as_string(x_coord); @@ -186,8 +186,8 @@ public: } private: - std::vector _x_coord {}; - std::vector _y_coord {}; + std::vector _x_coord{}; + std::vector _y_coord{}; }; class CLTileAccessScalarVariableBroadcastXTest : public ITest @@ -232,12 +232,12 @@ public: const int32_t x_coord_clamped = clamp(x_coord, static_cast(0), width - 1); const TileInfo info(dt, height, width); - const CLTile tile(tile_name, info); + const CLTile tile(tile_name, info); const TileVariable var = tile.scalar(y_coord, x_coord); - const std::string actual_var_name = var.str; - std::string expected_var_name = tile_name; + const std::string actual_var_name = var.str; + std::string expected_var_name = tile_name; expected_var_name += "_" + std::to_string(y_coord); if(width != 1) { @@ -255,9 +255,9 @@ public: } private: - std::vector _width {}; - std::vector _x_coord {}; - std::vector _y_coord {}; + std::vector _width{}; + std::vector _x_coord{}; + std::vector _y_coord{}; }; class CLTileAccessScalarVariableBroadcastYTest : public ITest @@ -302,12 +302,12 @@ public: const int32_t y_coord_clamped = clamp(y_coord, static_cast(0), height - 1); const TileInfo info(dt, height, width); - const CLTile tile(tile_name, info); + const CLTile tile(tile_name, info); const TileVariable var = tile.scalar(y_coord, x_coord); - const std::string actual_var_name = var.str; - std::string expected_var_name = tile_name; + const std::string actual_var_name = var.str; + std::string expected_var_name = tile_name; if(height != 1) { expected_var_name += "_" + std::to_string(y_coord_clamped); @@ -329,9 +329,9 @@ public: } private: - std::vector _height {}; - std::vector _x_coord {}; - std::vector _y_coord {}; + std::vector _height{}; + std::vector _x_coord{}; + std::vector _y_coord{}; }; class CLTileAccessVectorVariablesTest : public ITest @@ -357,14 +357,14 @@ public: for(const auto &height : _heights) { const TileInfo info(dt, height, width); - const CLTile tile(tile_name, info); + const CLTile tile(tile_name, info); for(int32_t row = 0; row < height; ++row) { const TileVariable var = tile.vector(row); - const std::string actual_var_name = var.str; - std::string expected_var_name = tile_name; + const std::string actual_var_name = var.str; + std::string expected_var_name = tile_name; if(height != 1) { expected_var_name += "_" + std::to_string(row); @@ -382,7 +382,7 @@ public: } private: - std::vector _heights {}; + std::vector _heights{}; }; class CLTileAccessSubVectorVariablesTest : public ITest @@ -416,7 +416,7 @@ public: for(const auto &subwidth : _subwidths) { const TileInfo info(dt, height, width); - const CLTile tile(tile_name, info); + const CLTile tile(tile_name, info); for(int32_t row = 0; row < height; ++row) { @@ -459,8 +459,8 @@ public: } private: - std::vector _subwidths {}; - std::vector _offsets {}; + std::vector _subwidths{}; + std::vector _offsets{}; }; } // namespace ckw diff --git a/compute_kernel_writer/validation/tests/TensorBitMaskTest.hpp b/compute_kernel_writer/validation/tests/TensorBitMaskTest.hpp index a1a3588394..1e7d003879 100644 --- a/compute_kernel_writer/validation/tests/TensorBitMaskTest.hpp +++ b/compute_kernel_writer/validation/tests/TensorBitMaskTest.hpp @@ -72,13 +72,14 @@ public: // The status of this variable can change in VALIDATE_TEST() bool all_tests_passed = true; - VALIDATE_ON_MSG(_component.size() == _bitmask.size(), "The number of layouts and components does not match"); + VALIDATE_ON_MSG(_component.size() == _bitmask.size(), + "The number of layouts and components does not match"); const size_t num_tests = _component.size(); for(size_t i = 0; i < num_tests; ++i) { const TensorComponent component = _component[i]; const TensorComponentBitmask bitmask = _bitmask[i]; - const bool out = static_cast(component) & static_cast(bitmask); + const bool out = static_cast(component) & static_cast(bitmask); VALIDATE_TEST(out == true, all_tests_passed, i); } return all_tests_passed; @@ -90,8 +91,8 @@ public: } private: - std::vector _component {}; - std::vector _bitmask {}; + std::vector _component{}; + std::vector _bitmask{}; }; class TensorBitMaskFalseTest : public ITest @@ -191,13 +192,14 @@ public: // The status of this variable can change in VALIDATE_TEST() bool all_tests_passed = true; - VALIDATE_ON_MSG(_component.size() == _bitmask.size(), "The number of layouts and components does not match"); + VALIDATE_ON_MSG(_component.size() == _bitmask.size(), + "The number of layouts and components does not match"); const size_t num_tests = _component.size(); for(size_t i = 0; i < num_tests; ++i) { const TensorComponent component = _component[i]; const TensorComponentBitmask bitmask = _bitmask[i]; - const bool out = static_cast(component) & static_cast(bitmask); + const bool out = static_cast(component) & static_cast(bitmask); VALIDATE_TEST(out == false, all_tests_passed, i); } return all_tests_passed; @@ -209,9 +211,9 @@ public: } private: - std::vector _component {}; - std::vector _bitmask {}; + std::vector _component{}; + std::vector _bitmask{}; }; -} +} // namespace ckw #endif /* COMPUTE_KERNEL_WRITER_TESTS_TENSORBITMASK_HPP */ diff --git a/compute_kernel_writer/validation/tests/UtilsTest.hpp b/compute_kernel_writer/validation/tests/UtilsTest.hpp index 4a09d53f73..db1c8fd4ae 100644 --- a/compute_kernel_writer/validation/tests/UtilsTest.hpp +++ b/compute_kernel_writer/validation/tests/UtilsTest.hpp @@ -25,8 +25,8 @@ #define COMPUTE_KERNEL_WRITER_TESTS_UTILSTEST_HPP #include "ckw/TensorInfo.h" -#include "src/TensorUtils.h" #include "common/Common.h" +#include "src/TensorUtils.h" #include @@ -74,14 +74,15 @@ public: bool all_tests_passed = true; VALIDATE_ON_MSG(_layout.size() == _component.size(), "The number of layouts and components does not match"); - VALIDATE_ON_MSG(_layout.size() == _expected.size(), "The number of layouts and expected outputs does not match"); + VALIDATE_ON_MSG(_layout.size() == _expected.size(), + "The number of layouts and expected outputs does not match"); const size_t num_tests = _layout.size(); for(size_t i = 0; i < num_tests; ++i) { const TensorDataLayout layout = _layout[i]; const TensorDataLayoutComponent component = _component[i]; const TensorComponent expected = _expected[i]; - const TensorComponent out = get_tensor_dimension(layout, component); + const TensorComponent out = get_tensor_dimension(layout, component); VALIDATE_TEST(out == expected, all_tests_passed, i); } return all_tests_passed; @@ -93,10 +94,10 @@ public: } private: - std::vector _layout {}; - std::vector _component {}; - std::vector _expected {}; + std::vector _layout{}; + std::vector _component{}; + std::vector _expected{}; }; -} +} // namespace ckw #endif /* COMPUTE_KERNEL_WRITER_TESTS_UTILSTEST_HPP */ diff --git a/compute_kernel_writer/validation/tests/common/Common.h b/compute_kernel_writer/validation/tests/common/Common.h index d33d7f6688..8573c42b88 100644 --- a/compute_kernel_writer/validation/tests/common/Common.h +++ b/compute_kernel_writer/validation/tests/common/Common.h @@ -32,32 +32,34 @@ namespace ckw { #define VALIDATE_ON_MSG(exp, msg) assert(((void)msg, exp)) -#define VALIDATE_TEST(exp, all_tests_passed, id_test) \ - do \ - { \ - if((exp) == true) \ - { \ - all_tests_passed &= true; \ +#define VALIDATE_TEST(exp, all_tests_passed, id_test) \ + do \ + { \ + if((exp) == true) \ + { \ + all_tests_passed &= true; \ const std::string msg = "TEST " + std::to_string((id_test)) + ": [PASSED]"; \ - std::cout << msg << std::endl; \ - } \ - else \ - { \ - all_tests_passed &= false; \ + std::cout << msg << std::endl; \ + } \ + else \ + { \ + all_tests_passed &= false; \ const std::string msg = "TEST " + std::to_string((id_test)) + ": [FAILED]"; \ - std::cout << msg << std::endl; \ - } \ + std::cout << msg << std::endl; \ + } \ } while(false) class ITest { public: virtual ~ITest() = default; + /** Method to run the test * * @return it returns true if all tests passed */ virtual bool run() = 0; + /** Name of the test * * @return it returns the name of the test -- cgit v1.2.1