From 856f66e6c61b77d03f754cd0fa8439891f0e4aca Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 22 Apr 2021 21:13:21 +0100 Subject: Port CLGEMM to memory injecting interface Moves the following kernels: - CLGEMMMatrixMultiplyKernel - CLGEMMMatrixMultiplyNativeKernel - CLGEMMMatrixMultipluReshapedKernel - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel Moves the following functions - CLGEMM Introduces facilities to easy handling of auxiliary temporary buffers under then new run interface. Such are: - CLAuxTensorHandler: That allows wrapping of workspace buffers memory to CLBuffer objects - Ability to inject TensorInfo to allocator without transferring ownership. This reduce the copy overhead if needed. Resolves: COMPMID-4188 Signed-off-by: Georgios Pinitas Change-Id: I7055435d831b05b749b26302082e4ac45f26dfb0 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5498 Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins --- arm_compute/core/ITensorPack.h | 19 ++++++++++++------- arm_compute/core/Types.h | 8 ++++---- arm_compute/core/experimental/Types.h | 30 +++++++++++++++++++++++++----- 3 files changed, 41 insertions(+), 16 deletions(-) (limited to 'arm_compute/core') diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h index 8aea880bb6..2f41d4d51e 100644 --- a/arm_compute/core/ITensorPack.h +++ b/arm_compute/core/ITensorPack.h @@ -24,9 +24,11 @@ #ifndef ARM_COMPUTE_ITENSORPACK_H #define ARM_COMPUTE_ITENSORPACK_H +#include "arm_compute/core/experimental/Types.h" + #include #include -#include +#include namespace arm_compute { @@ -36,19 +38,20 @@ class ITensor; /** Tensor packing service */ class ITensorPack { -private: +public: struct PackElement { PackElement() = default; - PackElement(ITensor *tensor) - : tensor(tensor), ctensor(nullptr) + PackElement(int id, ITensor *tensor) + : id(id), tensor(tensor), ctensor(nullptr) { } - PackElement(const ITensor *ctensor) - : tensor(nullptr), ctensor(ctensor) + PackElement(int id, const ITensor *ctensor) + : id(id), tensor(nullptr), ctensor(ctensor) { } + int id{ -1 }; ITensor *tensor{ nullptr }; const ITensor *ctensor{ nullptr }; }; @@ -56,6 +59,8 @@ private: public: /** Default Constructor */ ITensorPack() = default; + /** Initializer list Constructor */ + ITensorPack(std::initializer_list l); /** Add tensor to the pack * * @param[in] id ID/type of the tensor to add @@ -102,7 +107,7 @@ public: bool empty() const; private: - std::map _pack{}; /**< Container with the packed tensors */ + std::unordered_map _pack{}; /**< Container with the packed tensors */ }; } // namespace arm_compute #endif /*ARM_COMPUTE_ITENSORPACK_H */ diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 9e054f26dd..ec9c419dbc 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1753,11 +1753,11 @@ private: /** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape. * - * The matrix A can only be reshaped through @ref CLGEMMReshapeLHSMatrixKernel or @ref NEGEMMInterleave4x4Kernel - * Note: Optionally just for @ref CLGEMMReshapeLHSMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block + * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or @ref NEGEMMInterleave4x4Kernel + * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block * - * The matrix B can only be reshaped through @ref CLGEMMReshapeRHSMatrixKernel or @ref NEGEMMTranspose1xWKernel - * Note: Optionally just for @ref CLGEMMReshapeRHSMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block + * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or @ref NEGEMMTranspose1xWKernel + * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block * */ class GEMMReshapeInfo final diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h index 7ddb930421..92ece460dc 100644 --- a/arm_compute/core/experimental/Types.h +++ b/arm_compute/core/experimental/Types.h @@ -47,6 +47,7 @@ enum TensorType : int32_t ACL_DST_0 = 30, ACL_DST_1 = 31, ACL_DST_2 = 32, + ACL_BIAS = ACL_SRC_2, ACL_INT = 50, ACL_INT_0 = 50, ACL_INT_1 = 51, @@ -54,21 +55,40 @@ enum TensorType : int32_t ACL_INT_3 = 53, ACL_INT_4 = 54, ACL_SRC_VEC = 256, + ACL_DST_VEC = 512, + ACL_INT_VEC = 1024 }; namespace experimental { +enum class MemoryLifetime +{ + Temporary = 0, + Persistent = 1, + Prepare = 2, +}; struct MemoryInfo { - MemoryInfo(TensorType type, size_t size, size_t alignment) noexcept - : type(type), + MemoryInfo() = default; + + MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept + : slot(slot), + size(size), + alignment(alignment) + { + } + + MemoryInfo(int slot, MemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept + : slot(slot), + lifetime(lifetime), size(size), alignment(alignment) { } - TensorType type; - size_t size; - size_t alignment; + int slot{ ACL_UNKNOWN }; + MemoryLifetime lifetime{ MemoryLifetime::Temporary }; + size_t size{ 0 }; + size_t alignment{ 64 }; }; using MemoryRequirements = std::vector; -- cgit v1.2.1