1 files changed, 438 insertions, 23 deletions
diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp
index b74e65430c..6a9d585c14 100644
--- a/arm_compute/Acl.hpp
+++ b/arm_compute/Acl.hpp
@@ -29,6 +29,7 @@
 #include <cstdlib>
 #include <memory>
 #include <string>
+#include <vector>
 
 #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED)
 #include <exception>
@@ -41,6 +42,9 @@ namespace acl
 {
 // Forward declarations
 class Context;
+class Queue;
+class Tensor;
+class TensorPack;
 
 /**< Status code enum */
 enum class StatusCode
@@ -71,7 +75,7 @@ struct ObjectDeleter
 #define OBJECT_DELETER(obj, func)              \
     template <>                                \
     struct ObjectDeleter<obj>                  \
-        \
+                                               \
     {                                          \
         static inline AclStatus Destroy(obj v) \
         {                                      \
@@ -80,6 +84,10 @@ struct ObjectDeleter
     };
 
 OBJECT_DELETER(AclContext, AclDestroyContext)
+OBJECT_DELETER(AclQueue, AclDestroyQueue)
+OBJECT_DELETER(AclTensor, AclDestroyTensor)
+OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack)
+OBJECT_DELETER(AclOperator, AclDestroyOperator)
 
 #undef OBJECT_DELETER
 
@@ -163,7 +171,7 @@ protected:
     ObjectBase() = default;
 
 protected:
-    std::shared_ptr<T> _object{ nullptr }; /**< Library object */
+    std::shared_ptr<T> _object{nullptr}; /**< Library object */
 };
 
 /** Equality operator for library object
@@ -213,8 +221,7 @@ public:
      * @param[in] status Status returned
      * @param[in] msg    Error message to be bound with the exception
      */
-    Status(StatusCode status, const std::string &msg)
-        : _status(status), _msg(msg)
+    Status(StatusCode status, const std::string &msg) : _status(status), _msg(msg)
     {
     }
     /** Returns an explanatory exception message
@@ -256,13 +263,12 @@ private:
  *
  * @return Status code
  */
-static inline StatusCode report_status(StatusCode status, const std::string &msg)
+static inline void report_status(StatusCode status, const std::string &msg)
 {
-    if(status != StatusCode::Success)
+    if (status != StatusCode::Success)
     {
         throw Status(status, msg);
     }
-    return status;
 }
 #else  /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
 /** Reports a status code
@@ -275,10 +281,10 @@ static inline StatusCode report_status(StatusCode status, const std::string &msg
  *
  * @return Status code
  */
-static inline StatusCode report_status(StatusCode status, const std::string &msg)
+static inline void report_status(StatusCode status, const std::string &msg)
 {
+    ARM_COMPUTE_IGNORE_UNUSED(status);
     ARM_COMPUTE_IGNORE_UNUSED(msg);
-    return status;
 }
 #endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
 
@@ -292,7 +298,8 @@ enum class Target
 /**< Available execution modes */
 enum class ExecutionMode
 {
-    FastRerun = AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */
+    FastRerun =
+        AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */
     FastStart = AclPreferFastStart, /**< Prefer minimizing startup time */
 };
 
@@ -313,12 +320,22 @@ public:
     /**< Context options */
     struct Options
     {
+        static constexpr int32_t num_threads_auto = -1; /**< Allow runtime to specify number of threads */
+
         /** Default Constructor
          *
          * @note By default no precision loss is enabled for operators
          * @note By default the preferred execution mode is to favor multiple consecutive reruns of an operator
          */
-        Options() = default;
+        Options()
+            : Options(ExecutionMode::FastRerun /* mode */,
+                      AclCpuCapabilitiesAuto /* caps */,
+                      false /* enable_fast_math */,
+                      nullptr /* kernel_config */,
+                      num_threads_auto /* max_compute_units */,
+                      nullptr /* allocator */)
+        {
+        }
         /** Constructor
          *
          * @param[in] mode              Execution mode to be used
@@ -335,14 +352,15 @@ public:
                 int32_t               max_compute_units,
                 AclAllocator         *allocator)
         {
-            opts.mode               = detail::as_cenum<AclExecutionMode>(mode);
-            opts.capabilities       = caps;
-            opts.enable_fast_math   = enable_fast_math;
-            opts.kernel_config_file = kernel_config;
-            opts.max_compute_units  = max_compute_units;
-            opts.allocator          = allocator;
+            copts.mode               = detail::as_cenum<AclExecutionMode>(mode);
+            copts.capabilities       = caps;
+            copts.enable_fast_math   = enable_fast_math;
+            copts.kernel_config_file = kernel_config;
+            copts.max_compute_units  = max_compute_units;
+            copts.allocator          = allocator;
         }
-        AclContextOptions opts{ acl_default_ctx_options };
+
+        AclContextOptions copts{};
     };
 
 public:
@@ -354,8 +372,7 @@ public:
      * @param[in]  target Target to create context for
      * @param[out] status Status information if requested
      */
-    explicit Context(Target target, StatusCode *status = nullptr)
-        : Context(target, Options(), status)
+    explicit Context(Target target, StatusCode *status = nullptr) : Context(target, Options(), status)
     {
     }
     /** Constructor
@@ -367,10 +384,408 @@ public:
     Context(Target target, const Options &options, StatusCode *status = nullptr)
     {
         AclContext ctx;
-        const auto st = detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.opts));
+        const auto st =
+            detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.copts));
         reset(ctx);
-        report_status(st, "Failure during context creation");
-        if(status)
+        report_status(st, "[Compute Library] Failed to create context");
+        if (status)
+        {
+            *status = st;
+        }
+    }
+};
+
+/**< Available tuning modes */
+enum class TuningMode
+{
+    Rapid      = AclRapid,
+    Normal     = AclNormal,
+    Exhaustive = AclExhaustive
+};
+
+/** Queue class
+ *
+ * Queue is responsible for the execution related aspects, with main responsibilities those of
+ * scheduling and tuning operators.
+ *
+ * Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently.
+ *
+ * @note An operator might depend on the maximum possible compute units that are provided in the context,
+ *       thus in cases where the number of the scheduling units of the queue are greater might lead to errors.
+ */
+class Queue : public detail::ObjectBase<AclQueue_>
+{
+public:
+    /**< Queue options */
+    struct Options
+    {
+        /** Default Constructor
+         *
+         * As default options, no tuning will be performed, and the number of scheduling units will
+         * depends on internal device discovery functionality
+         */
+        Options() : opts{AclTuningModeNone, 0} {};
+        /** Constructor
+         *
+         * @param[in] mode          Tuning mode to be used
+         * @param[in] compute_units Number of scheduling units to be used
+         */
+        Options(TuningMode mode, int32_t compute_units) : opts{detail::as_cenum<AclTuningMode>(mode), compute_units}
+        {
+        }
+
+        AclQueueOptions opts;
+    };
+
+public:
+    /** Constructor
+     *
+     * @note Serves as a simpler delegate constructor
+     * @note As queue options, default conservative options will be used
+     *
+     * @param[in]  ctx    Context to create queue for
+     * @param[out] status Status information if requested
+     */
+    explicit Queue(Context &ctx, StatusCode *status = nullptr) : Queue(ctx, Options(), status)
+    {
+    }
+    /** Constructor
+     *
+     * @note As queue options, default conservative options will be used
+     *
+     * @param[in]  ctx     Context from where the queue will be created from
+     * @param[in]  options Queue options to be used
+     * @param[out] status  Status information if requested
+     */
+    explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr)
+    {
+        AclQueue   queue;
+        const auto st = detail::as_enum<StatusCode>(AclCreateQueue(&queue, ctx.get(), &options.opts));
+        reset(queue);
+        report_status(st, "[Compute Library] Failed to create queue!");
+        if (status)
+        {
+            *status = st;
+        }
+    }
+    /** Block until all the tasks of the queue have been marked as finished
+     *
+     * @return Status code
+     */
+    StatusCode finish()
+    {
+        return detail::as_enum<StatusCode>(AclQueueFinish(_object.get()));
+    }
+};
+
+/**< Data type enumeration */
+enum class DataType
+{
+    Unknown  = AclDataTypeUnknown,
+    UInt8    = AclUInt8,
+    Int8     = AclInt8,
+    UInt16   = AclUInt16,
+    Int16    = AclInt16,
+    UInt32   = AclUint32,
+    Int32    = AclInt32,
+    Float16  = AclFloat16,
+    BFloat16 = AclBFloat16,
+    Float32  = AclFloat32,
+};
+
+/** Tensor Descriptor class
+ *
+ * Structure that contains all the required meta-data to represent a tensor
+ */
+class TensorDescriptor
+{
+public:
+    /** Constructor
+     *
+     * @param[in] shape Shape of the tensor
+     * @param[in] data_type Data type of the tensor
+     */
+    TensorDescriptor(const std::vector<int32_t> &shape, DataType data_type) : _shape(shape), _data_type(data_type)
+    {
+        _cdesc.ndims     = _shape.size();
+        _cdesc.shape     = _shape.data();
+        _cdesc.data_type = detail::as_cenum<AclDataType>(_data_type);
+        _cdesc.strides   = nullptr;
+        _cdesc.boffset   = 0;
+    }
+    /** Constructor
+     *
+     * @param[in] desc C-type descriptor
+     */
+    explicit TensorDescriptor(const AclTensorDescriptor &desc)
+    {
+        _cdesc     = desc;
+        _data_type = detail::as_enum<DataType>(desc.data_type);
+        _shape.reserve(desc.ndims);
+        for (int32_t d = 0; d < desc.ndims; ++d)
+        {
+            _shape.emplace_back(desc.shape[d]);
+        }
+    }
+    /** Get underlying C tensor descriptor
+     *
+     * @return Underlying structure
+     */
+    const AclTensorDescriptor *get() const
+    {
+        return &_cdesc;
+    }
+    /** Operator to compare two TensorDescriptor
+     *
+     * @param[in] other The instance to compare against
+     *
+     * @return True if two instances have the same shape and data type
+     */
+    bool operator==(const TensorDescriptor &other)
+    {
+        bool is_same = true;
+
+        is_same &= _data_type == other._data_type;
+        is_same &= _shape.size() == other._shape.size();
+
+        if (is_same)
+        {
+            for (uint32_t d = 0; d < _shape.size(); ++d)
+            {
+                is_same &= _shape[d] == other._shape[d];
+            }
+        }
+
+        return is_same;
+    }
+
+private:
+    std::vector<int32_t> _shape{};
+    DataType             _data_type{};
+    AclTensorDescriptor  _cdesc{};
+};
+
+/** Import memory types */
+enum class ImportType
+{
+    Host = AclImportMemoryType::AclHostPtr
+};
+
+/** Tensor class
+ *
+ * Tensor is an mathematical construct that can represent an N-Dimensional space.
+ *
+ * @note Maximum dimensionality support is 6 internally at the moment
+ */
+class Tensor : public detail::ObjectBase<AclTensor_>
+{
+public:
+    /** Constructor
+     *
+     * @note Tensor memory is allocated
+     *
+     * @param[in]  ctx    Context from where the tensor will be created from
+     * @param[in]  desc   Tensor descriptor to be used
+     * @param[out] status Status information if requested
+     */
+    Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr) : Tensor(ctx, desc, true, status)
+    {
+    }
+    /** Constructor
+     *
+     * @param[in]  ctx    Context from where the tensor will be created from
+     * @param[in]  desc   Tensor descriptor to be used
+     * @param[in]  allocate Flag to indicate if the tensor needs to be allocated
+     * @param[out] status Status information if requested
+     */
+    Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status)
+    {
+        AclTensor  tensor;
+        const auto st = detail::as_enum<StatusCode>(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate));
+        reset(tensor);
+        report_status(st, "[Compute Library] Failed to create tensor!");
+        if (status)
+        {
+            *status = st;
+        }
+    }
+    /** Maps the backing memory of a given tensor that can be used by the host to access any contents
+     *
+     * @return A valid non-zero pointer in case of success else nullptr
+     */
+    void *map()
+    {
+        void      *handle = nullptr;
+        const auto st     = detail::as_enum<StatusCode>(AclMapTensor(_object.get(), &handle));
+        report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
+        return handle;
+    }
+    /** Unmaps tensor's memory
+     *
+     * @param[in] handle Handle to unmap
+     *
+     * @return Status code
+     */
+    StatusCode unmap(void *handle)
+    {
+        const auto st = detail::as_enum<StatusCode>(AclUnmapTensor(_object.get(), handle));
+        report_status(st, "[Compute Library] Failed to unmap the tensor!");
+        return st;
+    }
+    /** Import external memory to a given tensor object
+     *
+     * @param[in] handle External memory handle
+     * @param[in] type   Type of memory to be imported
+     *
+     * @return Status code
+     */
+    StatusCode import(void *handle, ImportType type)
+    {
+        const auto st = detail::as_enum<StatusCode>(
+            AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(type)));
+        report_status(st, "[Compute Library] Failed to import external memory to tensor!");
+        return st;
+    }
+    /** Get the size of the tensor in byte
+     *
+     * @note The size isn't based on allocated memory, but based on information in its descriptor (dimensions, data type, etc.).
+     *
+     * @return The size of the tensor in byte
+     */
+    uint64_t get_size()
+    {
+        uint64_t   size{0};
+        const auto st = detail::as_enum<StatusCode>(AclGetTensorSize(_object.get(), &size));
+        report_status(st, "[Compute Library] Failed to get the size of the tensor");
+        return size;
+    }
+    /** Get the descriptor of this tensor
+     *
+     * @return The descriptor describing the characteristics of this tensor
+     */
+    TensorDescriptor get_descriptor()
+    {
+        AclTensorDescriptor desc;
+        const auto          st = detail::as_enum<StatusCode>(AclGetTensorDescriptor(_object.get(), &desc));
+        report_status(st, "[Compute Library] Failed to get the descriptor of the tensor");
+        return TensorDescriptor(desc);
+    }
+};
+
+/** Tensor pack class
+ *
+ * Pack is a utility construct that is used to create a collection of tensors that can then
+ * be passed into operator as inputs.
+ */
+class TensorPack : public detail::ObjectBase<AclTensorPack_>
+{
+public:
+    /** Pack pair construct */
+    struct PackPair
+    {
+        /** Constructor
+         *
+         * @param[in] tensor_ Tensor to pack
+         * @param[in] slot_id_ Slot identification of the tensor in respect with the operator
+         */
+        PackPair(Tensor *tensor_, int32_t slot_id_) : tensor(tensor_), slot_id(slot_id_)
+        {
+        }
+
+        Tensor *tensor{nullptr};         /**< Tensor object */
+        int32_t slot_id{AclSlotUnknown}; /**< Slot id in respect with the operator */
+    };
+
+public:
+    /** Constructor
+     *
+     * @param[in]  ctx    Context from where the tensor pack will be created from
+     * @param[out] status Status information if requested
+     */
+    explicit TensorPack(Context &ctx, StatusCode *status = nullptr)
+    {
+        AclTensorPack pack;
+        const auto    st = detail::as_enum<StatusCode>(AclCreateTensorPack(&pack, ctx.get()));
+        reset(pack);
+        report_status(st, "[Compute Library] Failure during tensor pack creation");
+        if (status)
+        {
+            *status = st;
+        }
+    }
+    /** Add tensor to tensor pack
+     *
+     * @param[in] slot_id Slot id of the tensor in respect with the operator
+     * @param[in] tensor  Tensor to be added in the pack
+     *
+     * @return Status code
+     */
+    StatusCode add(Tensor &tensor, int32_t slot_id)
+    {
+        return detail::as_enum<StatusCode>(AclPackTensor(_object.get(), tensor.get(), slot_id));
+    }
+    /** Add a list of tensors to a tensor pack
+     *
+     * @param[in] packed Pair packs to be added
+     *
+     * @return Status code
+     */
+    StatusCode add(std::initializer_list<PackPair> packed)
+    {
+        const size_t           size = packed.size();
+        std::vector<int32_t>   slots(size);
+        std::vector<AclTensor> tensors(size);
+        int                    i = 0;
+        for (auto &p : packed)
+        {
+            slots[i]   = p.slot_id;
+            tensors[i] = AclTensor(p.tensor);
+            ++i;
+        }
+        return detail::as_enum<StatusCode>(AclPackTensors(_object.get(), tensors.data(), slots.data(), size));
+    }
+};
+
+/** Operator class
+ *
+ * Operators are the basic algorithmic blocks responsible for performing distinct operations
+ */
+class Operator : public detail::ObjectBase<AclOperator_>
+{
+public:
+    /** Run an operator on a given input list
+     *
+     * @param[in,out] queue Queue to scheduler the operator on
+     * @param pack  Tensor list to be used as input
+     *
+     * @return Status Code
+     */
+    StatusCode run(Queue &queue, TensorPack &pack)
+    {
+        return detail::as_cenum<StatusCode>(AclRunOperator(_object.get(), queue.get(), pack.get()));
+    }
+
+protected:
+    /** Constructor */
+    Operator() = default;
+};
+
+/// Operators
+using ActivationDesc = AclActivationDescriptor;
+class Activation : public Operator
+{
+public:
+    Activation(Context                &ctx,
+               const TensorDescriptor &src,
+               const TensorDescriptor &dst,
+               const ActivationDesc   &desc,
+               StatusCode             *status = nullptr)
+    {
+        AclOperator op;
+        const auto  st = detail::as_enum<StatusCode>(AclActivation(&op, ctx.get(), src.get(), dst.get(), desc));
+        reset(op);
+        report_status(st, "[Compute Library] Failure during Activation operator creation");
+        if (status)
         {
             *status = st;
         }