Port CLGEMM to memory injecting interface

Moves the following kernels: - CLGEMMMatrixMultiplyKernel - CLGEMMMatrixMultiplyNativeKernel - CLGEMMMatrixMultipluReshapedKernel - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel Moves the following functions - CLGEMM Introduces facilities to easy handling of auxiliary temporary buffers under then new run interface. Such are: - CLAuxTensorHandler: That allows wrapping of workspace buffers memory to CLBuffer objects - Ability to inject TensorInfo to allocator without transferring ownership. This reduce the copy overhead if needed. Resolves: COMPMID-4188 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I7055435d831b05b749b26302082e4ac45f26dfb0 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5498 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
author: Georgios Pinitas <georgios.pinitas@arm.com> 2021-04-22 21:13:21 +0100
committer: Georgios Pinitas <georgios.pinitas@arm.com> 2021-05-18 14:48:39 +0000
commit: 856f66e6c61b77d03f754cd0fa8439891f0e4aca (patch)
tree: f9379cd0853ac407109e54c3d53b385ceee066c2 /arm_compute
parent: 37f4b2ef1ea225a90ccb563fcb2c08f8fb0fb5d5 (diff)
download: ComputeLibrary-856f66e6c61b77d03f754cd0fa8439891f0e4aca.tar.gz
12 files changed, 260 insertions, 313 deletions
diff --git a/arm_compute/core/ITensorPack.h b/arm_compute/core/ITensorPack.h
index 8aea880bb6..2f41d4d51e 100644
--- a/arm_compute/core/ITensorPack.h
+++ b/arm_compute/core/ITensorPack.h
@@ -24,9 +24,11 @@
 #ifndef ARM_COMPUTE_ITENSORPACK_H
 #define ARM_COMPUTE_ITENSORPACK_H
 
+#include "arm_compute/core/experimental/Types.h"
+
 #include <cstddef>
 #include <cstdint>
-#include <map>
+#include <unordered_map>
 
 namespace arm_compute
 {
@@ -36,19 +38,20 @@ class ITensor;
 /** Tensor packing service */
 class ITensorPack
 {
-private:
+public:
     struct PackElement
     {
         PackElement() = default;
-        PackElement(ITensor *tensor)
-            : tensor(tensor), ctensor(nullptr)
+        PackElement(int id, ITensor *tensor)
+            : id(id), tensor(tensor), ctensor(nullptr)
         {
         }
-        PackElement(const ITensor *ctensor)
-            : tensor(nullptr), ctensor(ctensor)
+        PackElement(int id, const ITensor *ctensor)
+            : id(id), tensor(nullptr), ctensor(ctensor)
         {
         }
 
+        int            id{ -1 };
         ITensor       *tensor{ nullptr };
         const ITensor *ctensor{ nullptr };
     };
@@ -56,6 +59,8 @@ private:
 public:
     /** Default Constructor */
     ITensorPack() = default;
+    /**  Initializer list Constructor */
+    ITensorPack(std::initializer_list<PackElement> l);
     /** Add tensor to the pack
      *
      * @param[in] id     ID/type of the tensor to add
@@ -102,7 +107,7 @@ public:
     bool empty() const;
 
 private:
-    std::map<unsigned int, PackElement> _pack{}; /**< Container with the packed tensors */
+    std::unordered_map<int, PackElement> _pack{}; /**< Container with the packed tensors */
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_ITENSORPACK_H */
diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h
index 9e054f26dd..ec9c419dbc 100644
--- a/arm_compute/core/Types.h
+++ b/arm_compute/core/Types.h
@@ -1753,11 +1753,11 @@ private:
 
 /** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape.
  *
- * The matrix A can only be reshaped through @ref CLGEMMReshapeLHSMatrixKernel or  @ref NEGEMMInterleave4x4Kernel
- * Note: Optionally just for @ref CLGEMMReshapeLHSMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block
+ * The matrix A can only be reshaped through @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel or  @ref NEGEMMInterleave4x4Kernel
+ * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeLhsMatrixKernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block
  *
- * The matrix B can only be reshaped through @ref CLGEMMReshapeRHSMatrixKernel or  @ref NEGEMMTranspose1xWKernel
- * Note: Optionally just for @ref CLGEMMReshapeRHSMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block
+ * The matrix B can only be reshaped through @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel or  @ref NEGEMMTranspose1xWKernel
+ * Note: Optionally just for @ref opencl::kernels::ClGemmReshapeRhsMatrixKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block
  *
  */
 class GEMMReshapeInfo final
diff --git a/arm_compute/core/experimental/Types.h b/arm_compute/core/experimental/Types.h
index 7ddb930421..92ece460dc 100644
--- a/arm_compute/core/experimental/Types.h
+++ b/arm_compute/core/experimental/Types.h
@@ -47,6 +47,7 @@ enum TensorType : int32_t
     ACL_DST_0   = 30,
     ACL_DST_1   = 31,
     ACL_DST_2   = 32,
+    ACL_BIAS    = ACL_SRC_2,
     ACL_INT     = 50,
     ACL_INT_0   = 50,
     ACL_INT_1   = 51,
@@ -54,21 +55,40 @@ enum TensorType : int32_t
     ACL_INT_3   = 53,
     ACL_INT_4   = 54,
     ACL_SRC_VEC = 256,
+    ACL_DST_VEC = 512,
+    ACL_INT_VEC = 1024
 };
 
 namespace experimental
 {
+enum class MemoryLifetime
+{
+    Temporary  = 0,
+    Persistent = 1,
+    Prepare    = 2,
+};
 struct MemoryInfo
 {
-    MemoryInfo(TensorType type, size_t size, size_t alignment) noexcept
-        : type(type),
+    MemoryInfo() = default;
+
+    MemoryInfo(int slot, size_t size, size_t alignment = 0) noexcept
+        : slot(slot),
+          size(size),
+          alignment(alignment)
+    {
+    }
+
+    MemoryInfo(int slot, MemoryLifetime lifetime, size_t size, size_t alignment = 0) noexcept
+        : slot(slot),
+          lifetime(lifetime),
           size(size),
           alignment(alignment)
     {
     }
-    TensorType type;
-    size_t     size;
-    size_t     alignment;
+    int            slot{ ACL_UNKNOWN };
+    MemoryLifetime lifetime{ MemoryLifetime::Temporary };
+    size_t         size{ 0 };
+    size_t         alignment{ 64 };
 };
 
 using MemoryRequirements = std::vector<MemoryInfo>;
diff --git a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
index eec01bcebe..075c5d1f45 100644
--- a/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
+++ b/arm_compute/runtime/CL/functions/CLFullyConnectedLayer.h
@@ -98,7 +98,7 @@ private:
  *
  *  -# @ref CLIm2ColKernel (called when the input comes from a convolutional layer)
  *  -# @ref CLTranspose (if @p are_weights_reshaped is set to false and transpose_weights is set to true ) (called once)
- *  -# @ref CLGEMMMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
+ *  -# @ref opencl::kernels::ClGemmMatrixMultiplyKernel or @ref CLGEMMLowpMatrixMultiplyCore (if quantized asymmetric)
  *
  * @note  The fully connected layer accepts "weights" tensors only with 2 dimensions.
  */
diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h
index 1e2ae7be64..38a07ef9fb 100644
--- a/arm_compute/runtime/CL/functions/CLGEMM.h
+++ b/arm_compute/runtime/CL/functions/CLGEMM.h
@@ -35,76 +35,12 @@
 
 namespace arm_compute
 {
+// Forward declarations
 class CLCompileContext;
-class CLGEMMReshapeRHSMatrixKernel;
-class CLGEMMMatrixMultiplyKernel;
-class CLGEMMMatrixMultiplyReshapedKernel;
-class CLGEMMMatrixMultiplyReshapedOnlyRHSKernel;
-class CLGEMMReshapeLHSMatrixKernel;
 class ICLTensor;
 class ITensorInfo;
 
-namespace weights_transformations
-{
-/** Basic function to manage the reshape weights generated from @ref CLGEMMReshapeRHSMatrixKernel */
-class CLGEMMReshapeRHSMatrixKernelManaged : public ITransformWeights
-{
-public:
-    /** Default constructor */
-    CLGEMMReshapeRHSMatrixKernelManaged();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLGEMMReshapeRHSMatrixKernelManaged(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
-    /** Default move constructor */
-    CLGEMMReshapeRHSMatrixKernelManaged(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    CLGEMMReshapeRHSMatrixKernelManaged &operator=(const CLGEMMReshapeRHSMatrixKernelManaged &) = delete;
-    /** Default move assignment operator */
-    CLGEMMReshapeRHSMatrixKernelManaged &operator=(CLGEMMReshapeRHSMatrixKernelManaged &&) = default;
-    /** Default desctructor */
-    ~CLGEMMReshapeRHSMatrixKernelManaged();
-    //Inherited method override
-    void run() override;
-
-    //Inherited method override
-    void release() override;
-
-    //Inherited method override
-    ICLTensor *get_weights() override;
-
-    //Inherited method override
-    uint32_t uid() override;
-
-    /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
-     *
-     * @param[in] input Input tensor. Data types supported: All
-     * @param[in] info  RHS matrix information to be used for reshaping.
-     */
-    void configure(const ICLTensor *input, GEMMRHSMatrixInfo info);
-
-    /** Configures the @ref CLGEMMReshapeRHSMatrixKernel kernel
-     *
-     * @param[in] compile_context The compile context to be used.
-     * @param[in] input           Input tensor. Data types supported: All
-     * @param[in] info            RHS matrix information to be used for reshaping.
-     */
-    void configure(const CLCompileContext &compile_context, const ICLTensor *input, GEMMRHSMatrixInfo info);
-
-private:
-    static constexpr uint32_t                     _uid{ 0x15 };
-    CLTensor                                      _output{};
-    std::unique_ptr<CLGEMMReshapeRHSMatrixKernel> _kernel;
-};
-} // namespace weights_transformations
-
-/** Basic function to execute GEMM on OpenCL. This function calls the following OpenCL kernels:
- *
- *  -# @ref CLGEMMReshapeLHSMatrixKernel (only if the RESHAPED_V1 is selected by the heuristic model)
- *  -# @ref CLGEMMReshapeRHSMatrixKernel (only if either the RESHAPED_V1 or RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method())
- *  -# @ref CLGEMMMatrixMultiplyKernel (only if either the NATIVE or RESHAPED_V1 is selected by the select_gemm_kernel method())
- *  -# @ref CLGEMMMatrixMultiplyReshapedKernel (only if RESHAPED_V1 is selected by the select_gemm_kernel method())
- *  -# @ref CLGEMMMatrixMultiplyReshapedOnlyRHSKernel (only if RESHAPED_ONLY_RHS is selected by the select_gemm_kernel method())
- *
- */
+/** Basic function to execute GEMM on OpenCL */
 class CLGEMM : public IFunction
 {
 public:
@@ -114,16 +50,16 @@ public:
      * @param[in] weights_manager (Optional) Weights manager.
      */
     CLGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr, IWeightsManager *weights_manager = nullptr);
+    /** Default destructor */
+    ~CLGEMM();
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLGEMM(const CLGEMM &) = delete;
     /** Default move constructor */
-    CLGEMM(CLGEMM &&) = default;
+    CLGEMM(CLGEMM &&);
     /** Prevent instances of this class from being copied (As this class contains pointers) */
     CLGEMM &operator=(const CLGEMM &) = delete;
     /** Default move assignment operator */
-    CLGEMM &operator=(CLGEMM &&) = default;
-    /** Default destructor */
-    ~CLGEMM();
+    CLGEMM &operator=(CLGEMM &&);
     /** Initialise the kernel's inputs and output
      *
      * Valid data layouts:
@@ -141,25 +77,6 @@ public:
      *
      * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
      *
-     * @param[in]  a         First input tensor  (Matrix or Vector A). Data types supported: F16/F32
-     * @param[in]  b         Second input tensor (Matrix B). Data type supported: same as @p a.
-     * @param[in]  c         Third input tensor  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
-     * @param[out] output    Output tensor. Data type supported: same as @p a
-     * @param[in]  alpha     Weight of the matrix product
-     * @param[in]  beta      Weight of matrix C
-     * @param[in]  gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
-     *                       if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
-     *                       in case matrix A and matrix B have been already transformed.
-     */
-    void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
-    /** Initialise the kernel's inputs and output
-     *
-     * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C].
-     *
-     * @note All tensors must have the same data type.
-     *
-     * @note Whilst the first input tensor can be a vector, the second input tensor must be at least a matrix
-     *
      * @param[in]  compile_context The compile context to be used.
      * @param[in]  a               First input tensor  (Matrix or Vector A). Data types supported: F16/F32
      * @param[in]  b               Second input tensor (Matrix B). Data type supported: same as @p a.
@@ -168,20 +85,20 @@ public:
      * @param[in]  alpha           Weight of the matrix product
      * @param[in]  beta            Weight of matrix C
      * @param[in]  gemm_info       (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
-     *                       if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
-     *                       in case matrix A and matrix B have been already transformed.
+     *                             if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping
+     *                             in case matrix A and matrix B have been already transformed.
      */
     void configure(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+
+    /** Initialise the kernel's inputs and output
+     *
+     * Similar to @ref CLGEMM::configure()
+     */
+    void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo());
+
     /** Static function to check if given info will lead to a valid configuration of @ref CLGEMM.
      *
-     * @param[in] a         First input tensor info  (Matrix or Vector A). Data types supported: F16/F32
-     * @param[in] b         Second input tensor info (Matrix B). Data type supported: same as @p a.
-     * @param[in] c         Third input tensor info  (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a.
-     * @param[in] output    Output tensor info. Data type supported: same as @p a
-     * @param[in] alpha     Weight of the matrix product
-     * @param[in] beta      Weight of matrix C
-     * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and
-     *                       if the reshape of matrix B should happen only for the first run
+     * Similar to @ref CLGEMM::configure()
      *
      * @return a status
      */
@@ -192,34 +109,8 @@ public:
     void prepare() override;
 
 private:
-    void configure_native_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_v1(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_v2(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    void configure_reshaped_only_rhs(const CLCompileContext &compile_context, const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta,
-                                     const GEMMInfo &gemm_info);
-
-    static Status validate_native_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    static Status validate_reshaped_v1(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    static Status validate_reshaped(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-    static Status validate_reshaped_only_rhs(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info);
-
-    MemoryGroup                                                                   _memory_group;
-    IWeightsManager                                                              *_weights_manager;
-    std::unique_ptr<CLGEMMMatrixMultiplyKernel>                                   _mm_kernel;
-    std::unique_ptr<CLGEMMReshapeLHSMatrixKernel>                                 _reshape_lhs_kernel;
-    std::unique_ptr<CLGEMMReshapeRHSMatrixKernel>                                 _reshape_rhs_kernel;
-    std::unique_ptr<weights_transformations::CLGEMMReshapeRHSMatrixKernelManaged> _reshape_rhs_kernel_managed;
-    std::unique_ptr<CLGEMMMatrixMultiplyReshapedKernel>                           _mm_reshaped_kernel;
-    std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>                    _mm_reshaped_only_rhs_kernel;
-    std::unique_ptr<CLGEMMMatrixMultiplyReshapedOnlyRHSKernel>                    _mm_reshaped_only_rhs_fallback_kernel;
-    CLTensor                                                                      _tmp_a;
-    CLTensor                                                                      _tmp_b;
-    const ICLTensor                                                              *_original_b;
-    const ICLTensor                                                              *_lhs;
-    ICLTensor                                                                    *_dst;
-    bool                                                                          _reshape_b_only_on_first_run;
-    bool                                                                          _is_prepared;
-    CLGEMMKernelType                                                              _gemm_kernel_type;
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
 } // namespace arm_compute
 
diff --git a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
index e7f4cb9d01..e5de45c34f 100644
--- a/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
+++ b/arm_compute/runtime/CL/functions/CLGEMMLowpMatrixMultiplyCore.h
@@ -41,7 +41,13 @@ class CLGEMMLowpOffsetContributionKernel;
 class CLGEMMLowpOffsetContributionOutputStageKernel;
 class CLGEMMLowpMatrixAReductionKernel;
 class CLGEMMLowpMatrixBReductionKernel;
-class CLGEMMReshapeRHSMatrixKernel;
+namespace opencl
+{
+namespace kernels
+{
+class ClGemmReshapeRhsMatrixKernel;
+} // namespace kernels
+} // namespace opencl
 
 /** Basic function to execute GEMMLowpMatrixMultiplyCore on OpenCL. */
 class CLGEMMLowpMatrixMultiplyCore : public IFunction
@@ -140,7 +146,7 @@ private:
     std::unique_ptr<CLDepthConvertLayerKernel>                     _weights_to_qasymm8;
     std::unique_ptr<CLGEMMLowpMatrixMultiplyNativeKernel>          _mm_native_kernel;
     std::unique_ptr<CLGEMMLowpMatrixMultiplyReshapedOnlyRHSKernel> _mm_reshaped_only_rhs_kernel;
-    std::unique_ptr<CLGEMMReshapeRHSMatrixKernel>                  _mtx_b_reshape_kernel;
+    std::unique_ptr<opencl::kernels::ClGemmReshapeRhsMatrixKernel> _mtx_b_reshape_kernel;
     std::unique_ptr<CLGEMMLowpMatrixAReductionKernel>              _mtx_a_reduction_kernel;
     std::unique_ptr<CLGEMMLowpMatrixBReductionKernel>              _mtx_b_reduction_kernel;
     std::unique_ptr<CLGEMMLowpOffsetContributionKernel>            _offset_contribution_kernel;
diff --git a/arm_compute/runtime/CL/functions/CLLogicalAnd.h b/arm_compute/runtime/CL/functions/CLLogicalAnd.h
index 61a15816eb..e3061e1dc3 100644
--- a/arm_compute/runtime/CL/functions/CLLogicalAnd.h
+++ b/arm_compute/runtime/CL/functions/CLLogicalAnd.h
@@ -34,37 +34,6 @@ class CLCompileContext;
 class ICLTensor;
 class ITensorInfo;
 
-namespace experimental
-{
-class CLLogicalAnd : public ICLOperator
-{
-public:
-    /** Default Constructor */
-    CLLogicalAnd() = default;
-    /** Initialise the kernel's inputs, output and conversion policy.
-     *
-     * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] input1          First tensor input. Data types supported: U8.
-     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
-     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel
-     *
-     * @param[in] input1 First tensor input info. Data types supported: U8.
-     * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
-     * @param[in] output Output tensor info. Data types supported: same as @p input1.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-    // Inherited methods overridden:
-    void run(ITensorPack &tensors) override;
-};
-} // namespace experimental
-
 /** Basic function to run @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel.
  *
  * @note The tensor data type for the inputs must be U8.
@@ -125,5 +94,36 @@ private:
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
+
+namespace experimental
+{
+class CLLogicalAnd : public ICLOperator
+{
+public:
+    /** Default Constructor */
+    CLLogicalAnd() = default;
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel
+     *
+     * @param[in] input1 First tensor input info. Data types supported: U8.
+     * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: same as @p input1.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+    // Inherited methods overridden:
+    void run(ITensorPack &tensors) override;
+};
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLLOGICALAND_H */
diff --git a/arm_compute/runtime/CL/functions/CLLogicalOr.h b/arm_compute/runtime/CL/functions/CLLogicalOr.h
index b9ffb4a449..893c22f721 100644
--- a/arm_compute/runtime/CL/functions/CLLogicalOr.h
+++ b/arm_compute/runtime/CL/functions/CLLogicalOr.h
@@ -34,37 +34,6 @@ class CLCompileContext;
 class ICLTensor;
 class ITensorInfo;
 
-namespace experimental
-{
-class CLLogicalOr : public ICLOperator
-{
-public:
-    /** Default Constructor */
-    CLLogicalOr() = default;
-    /** Initialise the kernel's inputs, output and conversion policy.
-     *
-     * @param[in]      compile_context The compile context to be used.
-     * @param[in, out] input1          First tensor input. Data types supported: U8.
-     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
-     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
-     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
-     */
-    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
-    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel
-     *
-     * @param[in] input1 First tensor input info. Data types supported: U8.
-     * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
-     * @param[in] output Output tensor info. Data types supported: same as @p input1.
-     *
-     * @return a status
-     */
-    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
-    // Inherited methods overridden:
-    void run(ITensorPack &tensors) override;
-};
-} // namespace experimental
-
 /** Basic function to run @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel.
  *
  * @note The tensor data type for the inputs must be U8.
@@ -125,5 +94,36 @@ private:
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
+
+namespace experimental
+{
+class CLLogicalOr : public ICLOperator
+{
+public:
+    /** Default Constructor */
+    CLLogicalOr() = default;
+    /** Initialise the kernel's inputs, output and conversion policy.
+     *
+     * @param[in]      compile_context The compile context to be used.
+     * @param[in, out] input1          First tensor input. Data types supported: U8.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[in, out] input2          Second tensor input. Data types supported: same as @p input1.
+     *                                 The input tensor is [in, out] because its TensorInfo might be modified inside the kernel in case of broadcasting of dimension 0.
+     * @param[out]     output          Output tensor. Data types supported: same as @p input1.
+     */
+    void configure(const CLCompileContext &compile_context, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output);
+    /** Static function to check if given info will lead to a valid configuration of @ref arm_compute::opencl::kernels::ClLogicalBinaryKernel
+     *
+     * @param[in] input1 First tensor input info. Data types supported: U8.
+     * @param[in] input2 Second tensor input info. Data types supported: same as @p input1.
+     * @param[in] output Output tensor info. Data types supported: same as @p input1.
+     *
+     * @return a status
+     */
+    static Status validate(const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output);
+    // Inherited methods overridden:
+    void run(ITensorPack &tensors) override;
+};
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CLLOGICALOR_H */
diff --git a/arm_compute/runtime/CL/functions/CLSlice.h b/arm_compute/runtime/CL/functions/CLSlice.h
index 7a7689c528..297bcd86fe 100644
--- a/arm_compute/runtime/CL/functions/CLSlice.h
+++ b/arm_compute/runtime/CL/functions/CLSlice.h
@@ -34,45 +34,6 @@ class ICLTensor;
 class CLCompileContext;
 class ITensorInfo;
 
-namespace experimental
-{
-/** Basic function to perform tensor slicing */
-class CLSlice : public ICLOperator
-{
-public:
-    /** Configure kernel
-     *
-     * @note Supported tensor rank: up to 4
-     * @note Start indices must be non-negative. 0 <= starts[i]
-     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
-     * @note End indices are not inclusive unless negative.
-     *
-     * @param[in]  compile_context The compile context to be used.
-     * @param[in]  input           Source tensor info. Data type supported: All.
-     * @param[out] output          Destination tensor info. Data type supported: Same as @p input
-     * @param[in]  starts          The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in]  ends            The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     */
-    void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
-
-    /** Static function to check if given info will lead to a valid configuration of @ref CLSlice
-     *
-     * @note Supported tensor rank: up to 4
-     * @note Start indices must be non-negative. 0 <= starts[i]
-     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
-     * @note End indices are not inclusive unless negative.
-     *
-     * @param[in] input  Source tensor info. Data type supported: All
-     * @param[in] output Destination tensor info. Data type supported: Same as @p input
-     * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     * @param[in] ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
-     *
-     * @return A status
-     */
-    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
-};
-} // namespace experimental
-
 /** Basic function to perform tensor slicing */
 class CLSlice : public IFunction
 {
@@ -148,5 +109,44 @@ private:
     struct Impl;
     std::unique_ptr<Impl> _impl;
 };
+
+namespace experimental
+{
+/** Basic function to perform tensor slicing */
+class CLSlice : public ICLOperator
+{
+public:
+    /** Configure kernel
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Start indices must be non-negative. 0 <= starts[i]
+     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+     * @note End indices are not inclusive unless negative.
+     *
+     * @param[in]  compile_context The compile context to be used.
+     * @param[in]  input           Source tensor info. Data type supported: All.
+     * @param[out] output          Destination tensor info. Data type supported: Same as @p input
+     * @param[in]  starts          The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in]  ends            The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     */
+    void configure(const CLCompileContext &compile_context, const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+
+    /** Static function to check if given info will lead to a valid configuration of @ref CLSlice
+     *
+     * @note Supported tensor rank: up to 4
+     * @note Start indices must be non-negative. 0 <= starts[i]
+     * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
+     * @note End indices are not inclusive unless negative.
+     *
+     * @param[in] input  Source tensor info. Data type supported: All
+     * @param[in] output Destination tensor info. Data type supported: Same as @p input
+     * @param[in] starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     * @param[in] ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
+     *
+     * @return A status
+     */
+    static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+};
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_CL_SLICE_H */
diff --git a/arm_compute/runtime/ITensorAllocator.h b/arm_compute/runtime/ITensorAllocator.h
index e80f7c4fb9..17e581b40e 100644
--- a/arm_compute/runtime/ITensorAllocator.h
+++ b/arm_compute/runtime/ITensorAllocator.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -36,7 +36,7 @@ class ITensorAllocator
 {
 public:
     /** Default constructor. */
-    ITensorAllocator();
+    ITensorAllocator() = default;
     /** Allow instances of this class to be copy constructed */
     ITensorAllocator(const ITensorAllocator &) = default;
     /** Allow instances of this class to be copied */
@@ -54,6 +54,14 @@ public:
      * @param[in] alignment Alignment in bytes that the underlying base pointer should comply with.
      */
     void init(const TensorInfo &input, size_t alignment = 0);
+    /** Initialize a tensor based with a reference TensorInfo
+     *
+     * @note ITensorAllocator won't own the TensorInfo thus these need to out-live
+     *
+     * @param[in] input     TensorInfo object containing the description of the tensor to initialize.
+     * @param[in] alignment Alignment in bytes that the underlying base pointer should comply with.
+     */
+    void soft_init(TensorInfo &input, size_t alignment = 0);
     /** Return a reference to the tensor's metadata
      *
      * @return Reference to the tensor's metadata.
@@ -93,8 +101,9 @@ protected:
     virtual void unlock() = 0;
 
 private:
-    TensorInfo _info;      /**< Tensor's metadata. */
-    size_t     _alignment; /**< Tensor's alignment in bytes */
+    TensorInfo  _info_owned{};             /**< Tensor's metadata. */
+    TensorInfo *_info_external{ nullptr }; /**< External Tensor's metadata */
+    size_t      _alignment{};              /**< Tensor's alignment in bytes */
 };
 } // namespace arm_compute
 #endif /*ARM_COMPUTE_ITENSORALLOCATOR_H */
diff --git a/arm_compute/runtime/NEON/functions/NESlice.h b/arm_compute/runtime/NEON/functions/NESlice.h
index 214ffa512c..550bfd2188 100644
--- a/arm_compute/runtime/NEON/functions/NESlice.h
+++ b/arm_compute/runtime/NEON/functions/NESlice.h
@@ -32,25 +32,44 @@ namespace arm_compute
 // Forward Declarations
 class ITensor;
 
-namespace experimental
-{
 /** Basic function to perform tensor slicing */
-class NESlice : public INEOperator
+class NESlice : public IFunction
 {
 public:
+    /** Default Constructor */
+    NESlice();
+    /** Default Destructor */
+    ~NESlice();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESlice(const NESlice &) = delete;
+    /** Default move constructor */
+    NESlice(NESlice &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NESlice &operator=(const NESlice &) = delete;
+    /** Default move assignment operator */
+    NESlice &operator=(NESlice &&);
+
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @note Supported tensor rank: up to 4
      * @note Start indices must be non-negative. 0 <= starts[i]
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
      * @note End indices are not inclusive unless negative.
      *
-     * @param[in]  input  Source tensor info. Data type supported: All
-     * @param[out] output Destination tensor info. Data type supported: Same as @p input
+     * @param[in]  input  Source tensor. Data type supported: All
+     * @param[out] output Destination tensor. Data type supported: Same as @p input
      * @param[in]  starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      */
-    void configure(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+    void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NESlice
      *
@@ -67,26 +86,21 @@ public:
      * @return A status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
-} // namespace experimental
 
+namespace experimental
+{
 /** Basic function to perform tensor slicing */
-class NESlice : public IFunction
+class NESlice : public INEOperator
 {
 public:
-    /** Default Constructor */
-    NESlice();
-    /** Default Destructor */
-    ~NESlice();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESlice(const NESlice &) = delete;
-    /** Default move constructor */
-    NESlice(NESlice &&);
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NESlice &operator=(const NESlice &) = delete;
-    /** Default move assignment operator */
-    NESlice &operator=(NESlice &&);
-
     /** Configure kernel
      *
      * Valid data layouts:
@@ -102,12 +116,12 @@ public:
      * @note End coordinates can be negative, which represents the number of elements before the end of that dimension.
      * @note End indices are not inclusive unless negative.
      *
-     * @param[in]  input  Source tensor. Data type supported: All
-     * @param[out] output Destination tensor. Data type supported: Same as @p input
+     * @param[in]  input  Source tensor info. Data type supported: All
+     * @param[out] output Destination tensor info. Data type supported: Same as @p input
      * @param[in]  starts The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  ends   The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      */
-    void configure(const ITensor *input, ITensor *output, const Coordinates &starts, const Coordinates &ends);
+    void configure(const ITensorInfo *input, ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
 
     /** Static function to check if given info will lead to a valid configuration of @ref NESlice
      *
@@ -124,13 +138,7 @@ public:
      * @return A status
      */
     static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Coordinates &starts, const Coordinates &ends);
-
-    // Inherited methods overridden:
-    void run() override;
-
-private:
-    struct Impl;
-    std::unique_ptr<Impl> _impl;
 };
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NE_SLICE_H */
diff --git a/arm_compute/runtime/NEON/functions/NEStridedSlice.h b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
index 7ba6a52a58..0b4c2a63a1 100644
--- a/arm_compute/runtime/NEON/functions/NEStridedSlice.h
+++ b/arm_compute/runtime/NEON/functions/NEStridedSlice.h
@@ -32,18 +32,37 @@ namespace arm_compute
 // Forward Declarations
 class ITensor;
 
-namespace experimental
-{
 /** Basic function to run @ref NEStridedSliceKernel */
-class NEStridedSlice : public INEOperator
+class NEStridedSlice : public IFunction
 {
 public:
+    /** Default Constructor */
+    NEStridedSlice();
+    /** Default Destructor */
+    ~NEStridedSlice();
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEStridedSlice(const NEStridedSlice &) = delete;
+    /** Default move constructor */
+    NEStridedSlice(NEStridedSlice &&);
+    /** Prevent instances of this class from being copied (As this class contains pointers) */
+    NEStridedSlice &operator=(const NEStridedSlice &) = delete;
+    /** Default move assignment operator */
+    NEStridedSlice &operator=(NEStridedSlice &&);
+
     /** Configure kernel
      *
+     * Valid data layouts:
+     * - All
+     *
+     * Valid data type configurations:
+     * |src    |dst    |
+     * |:------|:------|
+     * |All    |All    |
+     *
      * @note Supported tensor rank: up to 4
      *
-     * @param[in]  input            Source tensor info. Data type supported: All
-     * @param[out] output           Destination tensor info. Data type supported: Same as @p input
+     * @param[in]  input            Source tensor. Data type supported: All
+     * @param[out] output           Destination tensor. Data type supported: Same as @p input
      * @param[in]  starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  strides          The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -52,7 +71,7 @@ public:
      * @param[in]  shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
      *                              A slice of size 1 starting from starts[i] in the dimension must be preserved.
      */
-    void configure(const ITensorInfo *input, ITensorInfo *output,
+    void configure(const ITensor *input, ITensor *output,
                    const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                    int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
 
@@ -73,26 +92,21 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *output,
                            const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                            int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
+
+    // Inherited methods overridden:
+    void run() override;
+
+private:
+    struct Impl;
+    std::unique_ptr<Impl> _impl;
 };
-} // namespace experimental
 
+namespace experimental
+{
 /** Basic function to run @ref NEStridedSliceKernel */
-class NEStridedSlice : public IFunction
+class NEStridedSlice : public INEOperator
 {
 public:
-    /** Default Constructor */
-    NEStridedSlice();
-    /** Default Destructor */
-    ~NEStridedSlice();
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEStridedSlice(const NEStridedSlice &) = delete;
-    /** Default move constructor */
-    NEStridedSlice(NEStridedSlice &&);
-    /** Prevent instances of this class from being copied (As this class contains pointers) */
-    NEStridedSlice &operator=(const NEStridedSlice &) = delete;
-    /** Default move assignment operator */
-    NEStridedSlice &operator=(NEStridedSlice &&);
-
     /** Configure kernel
      *
      * Valid data layouts:
@@ -105,8 +119,8 @@ public:
      *
      * @note Supported tensor rank: up to 4
      *
-     * @param[in]  input            Source tensor. Data type supported: All
-     * @param[out] output           Destination tensor. Data type supported: Same as @p input
+     * @param[in]  input            Source tensor info. Data type supported: All
+     * @param[out] output           Destination tensor info. Data type supported: Same as @p input
      * @param[in]  starts           The starts of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  ends             The ends of the dimensions of the input tensor to be sliced. The length must be of rank(input).
      * @param[in]  strides          The strides of the dimensions of the input tensor to be sliced. The length must be of rank(input).
@@ -115,7 +129,7 @@ public:
      * @param[in]  shrink_axis_mask (Optional) If the ith bit of shrink_axis_mask is set, it implies that the ith specification shrinks the dimensionality by 1.
      *                              A slice of size 1 starting from starts[i] in the dimension must be preserved.
      */
-    void configure(const ITensor *input, ITensor *output,
+    void configure(const ITensorInfo *input, ITensorInfo *output,
                    const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                    int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
 
@@ -136,13 +150,7 @@ public:
     static Status validate(const ITensorInfo *input, const ITensorInfo *output,
                            const Coordinates &starts, const Coordinates &ends, const BiStrides &strides,
                            int32_t begin_mask = 0, int32_t end_mask = 0, int32_t shrink_axis_mask = 0);
-
-    // Inherited methods overridden:
-    void run() override;
-
-private:
-    struct Impl;
-    std::unique_ptr<Impl> _impl;
 };
+} // namespace experimental
 } // namespace arm_compute
 #endif /* ARM_COMPUTE_NE_STRIDED_SLICE_H */
author	Georgios Pinitas <georgios.pinitas@arm.com>	2021-04-22 21:13:21 +0100
committer	Georgios Pinitas <georgios.pinitas@arm.com>	2021-05-18 14:48:39 +0000
commit	856f66e6c61b77d03f754cd0fa8439891f0e4aca (patch)
tree	f9379cd0853ac407109e54c3d53b385ceee066c2 /arm_compute
parent	37f4b2ef1ea225a90ccb563fcb2c08f8fb0fb5d5 (diff)
download	ComputeLibrary-856f66e6c61b77d03f754cd0fa8439891f0e4aca.tar.gz