From 3f26ef4f9a2d447adb324dd69aec7c49cf7905fc Mon Sep 17 00:00:00 2001
From: Georgios Pinitas <georgios.pinitas@arm.com>
Date: Tue, 23 Feb 2021 10:01:33 +0000
Subject: Add tensor related data structures for the new API

Adds the following:
 - TensorDescriptor: which is responsible for holding the information
 needed to represent a tensor (e.g. shape, dimensions, etc)
 - Tensor: an aggreate object of a descriptor and a backing memory
 - TensorPack: A map of tensor that can be passed to operators as
 inputs/outputs

Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I02734ac6ad85700d91d6e73217b4637adbf5d177
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5260
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
---
 arm_compute/Acl.hpp                     | 252 ++++++++++++++++++++++++++++++--
 arm_compute/AclEntrypoints.h            | 127 ++++++++++++++++
 arm_compute/AclOpenClExt.h              |   9 ++
 arm_compute/AclTypes.h                  |  61 ++++++--
 arm_compute/core/utils/logging/Macros.h |  15 +-
 5 files changed, 440 insertions(+), 24 deletions(-)

(limited to 'arm_compute')
diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp
index b74e65430c..a009894438 100644
--- a/arm_compute/Acl.hpp
+++ b/arm_compute/Acl.hpp
@@ -29,6 +29,7 @@
 #include <cstdlib>
 #include <memory>
 #include <string>
+#include <vector>
 
 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED)
 #include <exception>
@@ -41,6 +42,8 @@ namespace acl
 {
 // Forward declarations
 class Context;
+class Tensor;
+class TensorPack;
 
 /**< Status code enum */
 enum class StatusCode
@@ -80,6 +83,8 @@ struct ObjectDeleter
     };
 
 OBJECT_DELETER(AclContext, AclDestroyContext)
+OBJECT_DELETER(AclTensor, AclDestroyTensor)
+OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack)
 
 #undef OBJECT_DELETER
 
@@ -256,13 +261,12 @@ private:
  *
  * @return Status code
  */
-static inline StatusCode report_status(StatusCode status, const std::string &msg)
+static inline void report_status(StatusCode status, const std::string &msg)
 {
     if(status != StatusCode::Success)
     {
         throw Status(status, msg);
     }
-    return status;
 }
 #else  /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
 /** Reports a status code
@@ -275,10 +279,10 @@ static inline StatusCode report_status(StatusCode status, const std::string &msg
  *
  * @return Status code
  */
-static inline StatusCode report_status(StatusCode status, const std::string &msg)
+static inline void report_status(StatusCode status, const std::string &msg)
 {
+    ARM_COMPUTE_IGNORE_UNUSED(status);
     ARM_COMPUTE_IGNORE_UNUSED(msg);
-    return status;
 }
 #endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
 
@@ -313,12 +317,22 @@ public:
     /**< Context options */
     struct Options
     {
+        static constexpr int32_t num_threads_auto = -1; /**< Allow runtime to specify number of threads */
+
         /** Default Constructor
          *
          * @note By default no precision loss is enabled for operators
          * @note By default the preferred execution mode is to favor multiple consecutive reruns of an operator
          */
-        Options() = default;
+        Options()
+            : Options(ExecutionMode::FastRerun /* mode */,
+                      AclCpuCapabilitiesAuto /* caps */,
+                      false /* enable_fast_math */,
+                      nullptr /* kernel_config */,
+                      num_threads_auto /* max_compute_units */,
+                      nullptr /* allocator */)
+        {
+        }
         /** Constructor
          *
          * @param[in] mode              Execution mode to be used
@@ -335,14 +349,15 @@ public:
                 int32_t               max_compute_units,
                 AclAllocator         *allocator)
         {
-            opts.mode               = detail::as_cenum<AclExecutionMode>(mode);
-            opts.capabilities       = caps;
-            opts.enable_fast_math   = enable_fast_math;
-            opts.kernel_config_file = kernel_config;
-            opts.max_compute_units  = max_compute_units;
-            opts.allocator          = allocator;
+            copts.mode               = detail::as_cenum<AclExecutionMode>(mode);
+            copts.capabilities       = caps;
+            copts.enable_fast_math   = enable_fast_math;
+            copts.kernel_config_file = kernel_config;
+            copts.max_compute_units  = max_compute_units;
+            copts.allocator          = allocator;
         }
-        AclContextOptions opts{ acl_default_ctx_options };
+
+        AclContextOptions copts{};
     };
 
 public:
@@ -367,14 +382,223 @@ public:
     Context(Target target, const Options &options, StatusCode *status = nullptr)
     {
         AclContext ctx;
-        const auto st = detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.opts));
+        const auto st = detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.copts));
         reset(ctx);
-        report_status(st, "Failure during context creation");
+        report_status(st, "[Arm Compute Library] Failed to create context");
+        if(status)
+        {
+            *status = st;
+        }
+    }
+};
+
+/**< Data type enumeration */
+enum class DataType
+{
+    Unknown  = AclDataTypeUnknown,
+    UInt8    = AclUInt8,
+    Int8     = AclInt8,
+    UInt16   = AclUInt16,
+    Int16    = AclInt16,
+    UInt32   = AclUint32,
+    Int32    = AclInt32,
+    Float16  = AclFloat16,
+    BFloat16 = AclBFloat16,
+    Float32  = AclFloat32,
+};
+
+/** Tensor Descriptor class
+ *
+ * Structure that contains all the required meta-data to represent a tensor
+ */
+class TensorDescriptor
+{
+public:
+    /** Constructor
+     *
+     * @param[in] shape Shape of the tensor
+     * @param[in] data_type Data type of the tensor
+     */
+    TensorDescriptor(const std::vector<int32_t> &shape, DataType data_type)
+        : _shape(shape), _data_type(data_type)
+    {
+        _cdesc.ndims     = _shape.size();
+        _cdesc.shape     = _shape.data();
+        _cdesc.data_type = detail::as_cenum<AclDataType>(_data_type);
+        _cdesc.strides   = nullptr;
+        _cdesc.boffset   = 0;
+    }
+    /** Get underlying C tensor descriptor
+     *
+     * @return Underlying structure
+     */
+    const AclTensorDescriptor *get() const
+    {
+        return &_cdesc;
+    }
+
+private:
+    std::vector<int32_t> _shape{};
+    DataType             _data_type{};
+    AclTensorDescriptor  _cdesc{};
+};
+
+/** Import memory types */
+enum class ImportType
+{
+    Host = AclImportMemoryType::AclHostPtr
+};
+
+/** Tensor class
+ *
+ * Tensor is an mathematical construct that can represent an N-Dimensional space.
+ *
+ * @note Maximum dimensionality support is 6 internally at the moment
+ */
+class Tensor : public detail::ObjectBase<AclTensor_>
+{
+public:
+    /** Constructor
+     *
+     * @note Tensor memory is allocated
+     *
+     * @param[in]  ctx    Context from where the tensor will be created from
+     * @param[in]  desc   Tensor descriptor to be used
+     * @param[out] status Status information if requested
+     */
+    Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr)
+        : Tensor(ctx, desc, true, status)
+    {
+    }
+    /** Constructor
+     *
+     * @param[in]  ctx    Context from where the tensor will be created from
+     * @param[in]  desc   Tensor descriptor to be used
+     * @param[in]  allocate Flag to indicate if the tensor needs to be allocated
+     * @param[out] status Status information if requested
+     */
+    Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status)
+    {
+        AclTensor  tensor;
+        const auto st = detail::as_enum<StatusCode>(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate));
+        reset(tensor);
+        report_status(st, "[Arm Compute Library] Failed to create tensor!");
         if(status)
         {
             *status = st;
         }
     }
+    /** Maps the backing memory of a given tensor that can be used by the host to access any contents
+     *
+     * @return A valid non-zero pointer in case of success else nullptr
+     */
+    void *map()
+    {
+        void      *handle = nullptr;
+        const auto st     = detail::as_enum<StatusCode>(AclMapTensor(_object.get(), &handle));
+        report_status(st, "[Arm Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
+        return handle;
+    }
+    /** Unmaps tensor's memory
+     *
+     * @param[in] handle Handle to unmap
+     *
+     * @return Status code
+     */
+    StatusCode unmap(void *handle)
+    {
+        const auto st = detail::as_enum<StatusCode>(AclUnmapTensor(_object.get(), handle));
+        report_status(st, "[Arm Compute Library] Failed to unmap the tensor!");
+        return st;
+    }
+    /** Import external memory to a given tensor object
+     *
+     * @param[in] handle External memory handle
+     * @param[in] type   Type of memory to be imported
+     *
+     * @return Status code
+     */
+    StatusCode import(void *handle, ImportType type)
+    {
+        const auto st = detail::as_enum<StatusCode>(AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(type)));
+        report_status(st, "[Arm Compute Library] Failed to import external memory to tensor!");
+        return st;
+    }
+};
+
+/** Tensor pack class
+ *
+ * Pack is a utility construct that is used to create a collection of tensors that can then
+ * be passed into operator as inputs.
+ */
+class TensorPack : public detail::ObjectBase<AclTensorPack_>
+{
+public:
+    /** Pack pair construct */
+    struct PackPair
+    {
+        /** Constructor
+         *
+         * @param[in] tensor_ Tensor to pack
+         * @param[in] slot_id_ Slot identification of the tensor in respect with the operator
+         */
+        PackPair(Tensor *tensor_, int32_t slot_id_)
+            : tensor(tensor_), slot_id(slot_id_)
+        {
+        }
+
+        Tensor *tensor{ nullptr };         /**< Tensor object */
+        int32_t slot_id{ AclSlotUnknown }; /**< Slot id in respect with the operator */
+    };
+
+public:
+    /** Constructor
+     *
+     * @param[in]  ctx    Context from where the tensor pack will be created from
+     * @param[out] status Status information if requested
+     */
+    explicit TensorPack(Context &ctx, StatusCode *status = nullptr)
+    {
+        AclTensorPack pack;
+        const auto    st = detail::as_enum<StatusCode>(AclCreateTensorPack(&pack, ctx.get()));
+        reset(pack);
+        report_status(st, "[Arm Compute Library] Failure during tensor pack creation");
+        if(status)
+        {
+            *status = st;
+        }
+    }
+    /** Add tensor to tensor pack
+     *
+     * @param[in] slot_id Slot id of the tensor in respect with the operator
+     * @param[in] tensor  Tensor to be added in the pack
+     *
+     * @return Status code
+     */
+    StatusCode add(Tensor &tensor, int32_t slot_id)
+    {
+        return detail::as_enum<StatusCode>(AclPackTensor(_object.get(), tensor.get(), slot_id));
+    }
+    /** Add a list of tensors to a tensor pack
+     *
+     * @param[in] packed Pair packs to be added
+     *
+     * @return Status code
+     */
+    StatusCode add(std::initializer_list<PackPair> packed)
+    {
+        const size_t           size = packed.size();
+        std::vector<int32_t>   slots(size);
+        std::vector<AclTensor> tensors(size);
+        int                    i = 0;
+        for(auto &p : packed)
+        {
+            slots[i]   = p.slot_id;
+            tensors[i] = AclTensor(p.tensor);
+            ++i;
+        }
+        return detail::as_enum<StatusCode>(AclPackTensors(_object.get(), tensors.data(), slots.data(), size));
+    }
 };
 } // namespace acl
 #undef ARM_COMPUTE_IGNORE_UNUSED
diff --git a/arm_compute/AclEntrypoints.h b/arm_compute/AclEntrypoints.h
index 02e072f826..cd974341c2 100644
--- a/arm_compute/AclEntrypoints.h
+++ b/arm_compute/AclEntrypoints.h
@@ -62,6 +62,133 @@ AclStatus AclCreateContext(AclContext              *ctx,
  */
 AclStatus AclDestroyContext(AclContext ctx);
 
+/** Create a Tensor object
+ *
+ * Tensor is a generalized matrix construct that can represent up to ND dimensionality (where N = 6 for Compute Library)
+ * The object holds a backing memory along-side to operate on
+ *
+ * @param[in, out] tensor   A valid non-zero tensor object if no failures occur
+ * @param[in]      ctx      Context to be used
+ * @param[in]      desc     Tensor representation meta-data
+ * @param[in]      allocate Instructs allocation of the tensor objects
+ *
+ * Returns:
+ *  - @ref AclSuccess if function was completed successfully
+ *  - @ref AclOutOfMemory if there was a failure allocating memory resources
+ *  - @ref AclUnsupportedTarget if the requested target is unsupported
+ *  - @ref AclInvalidArgument if a given argument is invalid
+ */
+AclStatus AclCreateTensor(AclTensor *tensor, AclContext ctx, const AclTensorDescriptor *desc, bool allocate);
+
+/** Map a tensor's backing memory to the host
+ *
+ * @param[in]      tensor Tensor to be mapped
+ * @param[in, out] handle A handle to the underlying backing memory
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if function was completed successfully
+ *  - @ref AclInvalidArgument if a given argument is invalid
+ */
+AclStatus AclMapTensor(AclTensor tensor, void **handle);
+
+/** Unmap the tensor's backing memory
+ *
+ * @param[in] tensor tensor to unmap memory from
+ * @param[in] handle Backing memory to be unmapped
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if function was completed successfully
+ *  - @ref AclInvalidArgument if a given argument is invalid
+ */
+AclStatus AclUnmapTensor(AclTensor tensor, void *handle);
+
+/** Import external memory to a given tensor object
+ *
+ * @param[in, out] tensor Tensor to import memory to
+ * @param[in]      handle Backing memory to be imported
+ * @param[in]      type   Type of the imported memory
+ *
+ * Returns:
+ *  - @ref AclSuccess if function was completed successfully
+ *  - @ref AclInvalidArgument if a given argument is invalid
+ */
+AclStatus AclTensorImport(AclTensor tensor, void *handle, AclImportMemoryType type);
+
+/** Destroy a given tensor object
+ *
+ * @param[in,out] tensor A valid tensor object to be destroyed
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if function was completed successfully
+ *  - @ref AclInvalidArgument if the provided tensor is invalid
+ */
+AclStatus AclDestroyTensor(AclTensor tensor);
+
+/** Creates a tensor pack
+ *
+ * Tensor packs are used to create a collection of tensors that can be passed around for operator execution
+ *
+ * @param[in,out] pack A valid non-zero tensor pack object if no failures occur
+ * @param[in]     ctx  Context to be used
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if function was completed successfully
+ *  - @ref AclOutOfMemory if there was a failure allocating memory resources
+ *  - @ref AclInvalidArgument if a given argument is invalid
+ */
+AclStatus AclCreateTensorPack(AclTensorPack *pack, AclContext ctx);
+
+/** Add a tensor to a tensor pack
+ *
+ * @param[in,out] pack    Pack to append a tensor to
+ * @param[in]     tensor  Tensor to pack
+ * @param[in]     slot_id Slot of the operator that the tensors corresponds to
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if function was completed successfully
+ *  - @ref AclOutOfMemory if there was a failure allocating memory resources
+ *  - @ref AclInvalidArgument if a given argument is invalid
+ */
+AclStatus AclPackTensor(AclTensorPack pack, AclTensor tensor, int32_t slot_id);
+
+/** A list of tensors to a tensor pack
+ *
+ * @param[in,out] pack        Pack to append the tensors to
+ * @param[in]     tensors     Tensors to append to the pack
+ * @param[in]     slot_ids    Slot IDs of each tensors to the operators
+ * @param[in]     num_tensors Number of tensors that are passed
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if function was completed successfully
+ *  - @ref AclOutOfMemory if there was a failure allocating memory resources
+ *  - @ref AclInvalidArgument if a given argument is invalid
+ */
+AclStatus AclPackTensors(AclTensorPack pack, AclTensor *tensors, int32_t *slot_ids, size_t num_tensors);
+
+/** Destroy a given tensor pack object
+ *
+ * @param[in,out] pack A valid tensor pack object to destroy
+ *
+ * @return Status code
+ *
+ * Returns:
+ *  - @ref AclSuccess if functions was completed successfully
+ *  - @ref AclInvalidArgument if the provided context is invalid
+ */
+AclStatus AclDestroyTensorPack(AclTensorPack pack);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
diff --git a/arm_compute/AclOpenClExt.h b/arm_compute/AclOpenClExt.h
index f71cd37299..15b233ca12 100644
--- a/arm_compute/AclOpenClExt.h
+++ b/arm_compute/AclOpenClExt.h
@@ -63,6 +63,15 @@ AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context);
  */
 AclStatus AclSetClContext(AclContext ctx, cl_context opencl_context);
 
+/** Extract the underlying OpenCL memory object by a given Compute Library tensor object
+ *
+ * @param[in]  tensor     A valid non-zero tensor
+ * @param[out] opencl_mem Underlyig OpenCL memory object
+ *
+ * @return Status code
+ */
+AclStatus AclGetClMem(AclTensor tensor, cl_mem *opencl_mem);
+
 #ifdef __cplusplus
 }
 #endif /* __cplusplus */
diff --git a/arm_compute/AclTypes.h b/arm_compute/AclTypes.h
index bee6d1a8d7..69717ec8a8 100644
--- a/arm_compute/AclTypes.h
+++ b/arm_compute/AclTypes.h
@@ -33,6 +33,10 @@ extern "C" {
 
 /**< Opaque Context object */
 typedef struct AclContext_ *AclContext;
+/**< Opaque Tensor object */
+typedef struct AclTensor_ *AclTensor;
+/**< Opaque Tensor pack object */
+typedef struct AclTensorPack_ *AclTensorPack;
 
 // Capabilities bitfield (Note: if multiple are enabled ComputeLibrary will pick the best possible)
 typedef uint64_t AclTargetCapabilities;
@@ -134,16 +138,55 @@ typedef struct AclContextOptions
     AclAllocator         *allocator;          /**< Allocator to be used by all the memory internally */
 } AclContextOptions;
 
-/** Default context */
-const AclContextOptions acl_default_ctx_options =
+/**< Supported data types */
+typedef enum AclDataType
 {
-    AclPreferFastRerun,     /* mode */
-    AclCpuCapabilitiesAuto, /* capabilities */
-    false,                  /* enable_fast_math */
-    "default.mlgo",         /* kernel_config_file */
-    -1,                     /* max_compute_units */
-    nullptr                 /* allocator */
-};
+    AclDataTypeUnknown = 0, /**< Unknown data type */
+    AclUInt8           = 1, /**< 8-bit unsigned integer */
+    AclInt8            = 2, /**< 8-bit signed integer */
+    AclUInt16          = 3, /**< 16-bit unsigned integer */
+    AclInt16           = 4, /**< 16-bit signed integer */
+    AclUint32          = 5, /**< 32-bit unsigned integer */
+    AclInt32           = 6, /**< 32-bit signed integer */
+    AclFloat16         = 7, /**< 16-bit floating point */
+    AclBFloat16        = 8, /**< 16-bit brain floating point */
+    AclFloat32         = 9, /**< 32-bit floating point */
+} AclDataType;
+
+/**< Supported data layouts for operations */
+typedef enum AclDataLayout
+{
+    AclDataLayoutUnknown = 0, /**< Unknown data layout */
+    AclNhwc              = 1, /**< Native, performant, Compute Library data layout */
+    AclNchw              = 2, /**< Data layout where width is the fastest changing dimension */
+} AclDataLayout;
+
+/** Type of memory to be imported */
+typedef enum AclImportMemoryType
+{
+    AclHostPtr = 0 /**< Host allocated memory */
+} AclImportMemoryType;
+
+/**< Tensor Descriptor */
+typedef struct AclTensorDescriptor
+{
+    int32_t     ndims;     /**< Number or dimensions */
+    int32_t    *shape;     /**< Tensor Shape */
+    AclDataType data_type; /**< Tensor Data type */
+    int64_t    *strides;   /**< Strides on each dimension. Linear memory is assumed if nullptr */
+    int64_t     boffset;   /**< Offset in terms of bytes for the first element */
+} AclTensorDescriptor;
+
+/**< Slot type of a tensor */
+typedef enum
+{
+    AclSlotUnknown = -1,
+    AclSrc         = 0,
+    AclSrc0        = 0,
+    AclSrc1        = 1,
+    AclDst         = 30,
+    AclSrcVec      = 256,
+} AclTensorSlot;
 
 #ifdef __cplusplus
 }
diff --git a/arm_compute/core/utils/logging/Macros.h b/arm_compute/core/utils/logging/Macros.h
index 21ed721eb1..1108dd3800 100644
--- a/arm_compute/core/utils/logging/Macros.h
+++ b/arm_compute/core/utils/logging/Macros.h
@@ -1,5 +1,5 @@
 /*
- * Copyright (c) 2017-2020 Arm Limited.
+ * Copyright (c) 2017-2021 Arm Limited.
  *
  * SPDX-License-Identifier: MIT
  *
@@ -40,6 +40,18 @@
         }                                                                                \
     } while(false)
 
+#define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg)                   \
+    do                                                                                   \
+    {                                                                                    \
+        auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \
+        if(__logger != nullptr)                                                          \
+        {                                                                                \
+            std::ostringstream s;                                                        \
+            s << __func__ << ":" << msg;                                                 \
+            __logger->log(log_level, s.str());                                           \
+        }                                                                                \
+    } while(false)
+
 #define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...)                     \
     do                                                                                        \
     {                                                                                         \
@@ -68,6 +80,7 @@
 #else /* ARM_COMPUTE_LOGGING_ENABLED */
 
 #define ARM_COMPUTE_LOG_MSG(logger_name, log_level, msg)
+#define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg)
 #define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...)
 #define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream)
 
-- 
cgit v1.2.1