aboutsummaryrefslogtreecommitdiff
path: root/arm_compute/Acl.hpp
diff options
context:
space:
mode:
Diffstat (limited to 'arm_compute/Acl.hpp')
-rw-r--r--arm_compute/Acl.hpp461
1 files changed, 438 insertions, 23 deletions
diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp
index b74e65430c..6a9d585c14 100644
--- a/arm_compute/Acl.hpp
+++ b/arm_compute/Acl.hpp
@@ -29,6 +29,7 @@
#include <cstdlib>
#include <memory>
#include <string>
+#include <vector>
#if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED)
#include <exception>
@@ -41,6 +42,9 @@ namespace acl
{
// Forward declarations
class Context;
+class Queue;
+class Tensor;
+class TensorPack;
/**< Status code enum */
enum class StatusCode
@@ -71,7 +75,7 @@ struct ObjectDeleter
#define OBJECT_DELETER(obj, func) \
template <> \
struct ObjectDeleter<obj> \
- \
+ \
{ \
static inline AclStatus Destroy(obj v) \
{ \
@@ -80,6 +84,10 @@ struct ObjectDeleter
};
OBJECT_DELETER(AclContext, AclDestroyContext)
+OBJECT_DELETER(AclQueue, AclDestroyQueue)
+OBJECT_DELETER(AclTensor, AclDestroyTensor)
+OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack)
+OBJECT_DELETER(AclOperator, AclDestroyOperator)
#undef OBJECT_DELETER
@@ -163,7 +171,7 @@ protected:
ObjectBase() = default;
protected:
- std::shared_ptr<T> _object{ nullptr }; /**< Library object */
+ std::shared_ptr<T> _object{nullptr}; /**< Library object */
};
/** Equality operator for library object
@@ -213,8 +221,7 @@ public:
* @param[in] status Status returned
* @param[in] msg Error message to be bound with the exception
*/
- Status(StatusCode status, const std::string &msg)
- : _status(status), _msg(msg)
+ Status(StatusCode status, const std::string &msg) : _status(status), _msg(msg)
{
}
/** Returns an explanatory exception message
@@ -256,13 +263,12 @@ private:
*
* @return Status code
*/
-static inline StatusCode report_status(StatusCode status, const std::string &msg)
+static inline void report_status(StatusCode status, const std::string &msg)
{
- if(status != StatusCode::Success)
+ if (status != StatusCode::Success)
{
throw Status(status, msg);
}
- return status;
}
#else /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
/** Reports a status code
@@ -275,10 +281,10 @@ static inline StatusCode report_status(StatusCode status, const std::string &msg
*
* @return Status code
*/
-static inline StatusCode report_status(StatusCode status, const std::string &msg)
+static inline void report_status(StatusCode status, const std::string &msg)
{
+ ARM_COMPUTE_IGNORE_UNUSED(status);
ARM_COMPUTE_IGNORE_UNUSED(msg);
- return status;
}
#endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */
@@ -292,7 +298,8 @@ enum class Target
/**< Available execution modes */
enum class ExecutionMode
{
- FastRerun = AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */
+ FastRerun =
+ AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */
FastStart = AclPreferFastStart, /**< Prefer minimizing startup time */
};
@@ -313,12 +320,22 @@ public:
/**< Context options */
struct Options
{
+ static constexpr int32_t num_threads_auto = -1; /**< Allow runtime to specify number of threads */
+
/** Default Constructor
*
* @note By default no precision loss is enabled for operators
* @note By default the preferred execution mode is to favor multiple consecutive reruns of an operator
*/
- Options() = default;
+ Options()
+ : Options(ExecutionMode::FastRerun /* mode */,
+ AclCpuCapabilitiesAuto /* caps */,
+ false /* enable_fast_math */,
+ nullptr /* kernel_config */,
+ num_threads_auto /* max_compute_units */,
+ nullptr /* allocator */)
+ {
+ }
/** Constructor
*
* @param[in] mode Execution mode to be used
@@ -335,14 +352,15 @@ public:
int32_t max_compute_units,
AclAllocator *allocator)
{
- opts.mode = detail::as_cenum<AclExecutionMode>(mode);
- opts.capabilities = caps;
- opts.enable_fast_math = enable_fast_math;
- opts.kernel_config_file = kernel_config;
- opts.max_compute_units = max_compute_units;
- opts.allocator = allocator;
+ copts.mode = detail::as_cenum<AclExecutionMode>(mode);
+ copts.capabilities = caps;
+ copts.enable_fast_math = enable_fast_math;
+ copts.kernel_config_file = kernel_config;
+ copts.max_compute_units = max_compute_units;
+ copts.allocator = allocator;
}
- AclContextOptions opts{ acl_default_ctx_options };
+
+ AclContextOptions copts{};
};
public:
@@ -354,8 +372,7 @@ public:
* @param[in] target Target to create context for
* @param[out] status Status information if requested
*/
- explicit Context(Target target, StatusCode *status = nullptr)
- : Context(target, Options(), status)
+ explicit Context(Target target, StatusCode *status = nullptr) : Context(target, Options(), status)
{
}
/** Constructor
@@ -367,10 +384,408 @@ public:
Context(Target target, const Options &options, StatusCode *status = nullptr)
{
AclContext ctx;
- const auto st = detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.opts));
+ const auto st =
+ detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.copts));
reset(ctx);
- report_status(st, "Failure during context creation");
- if(status)
+ report_status(st, "[Compute Library] Failed to create context");
+ if (status)
+ {
+ *status = st;
+ }
+ }
+};
+
+/**< Available tuning modes */
+enum class TuningMode
+{
+ Rapid = AclRapid,
+ Normal = AclNormal,
+ Exhaustive = AclExhaustive
+};
+
+/** Queue class
+ *
+ * Queue is responsible for the execution related aspects, with main responsibilities those of
+ * scheduling and tuning operators.
+ *
+ * Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently.
+ *
+ * @note An operator might depend on the maximum possible compute units that are provided in the context,
+ * thus in cases where the number of the scheduling units of the queue are greater might lead to errors.
+ */
+class Queue : public detail::ObjectBase<AclQueue_>
+{
+public:
+ /**< Queue options */
+ struct Options
+ {
+ /** Default Constructor
+ *
+ * As default options, no tuning will be performed, and the number of scheduling units will
+ * depends on internal device discovery functionality
+ */
+ Options() : opts{AclTuningModeNone, 0} {};
+ /** Constructor
+ *
+ * @param[in] mode Tuning mode to be used
+ * @param[in] compute_units Number of scheduling units to be used
+ */
+ Options(TuningMode mode, int32_t compute_units) : opts{detail::as_cenum<AclTuningMode>(mode), compute_units}
+ {
+ }
+
+ AclQueueOptions opts;
+ };
+
+public:
+ /** Constructor
+ *
+ * @note Serves as a simpler delegate constructor
+ * @note As queue options, default conservative options will be used
+ *
+ * @param[in] ctx Context to create queue for
+ * @param[out] status Status information if requested
+ */
+ explicit Queue(Context &ctx, StatusCode *status = nullptr) : Queue(ctx, Options(), status)
+ {
+ }
+ /** Constructor
+ *
+ * @note As queue options, default conservative options will be used
+ *
+ * @param[in] ctx Context from where the queue will be created from
+ * @param[in] options Queue options to be used
+ * @param[out] status Status information if requested
+ */
+ explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr)
+ {
+ AclQueue queue;
+ const auto st = detail::as_enum<StatusCode>(AclCreateQueue(&queue, ctx.get(), &options.opts));
+ reset(queue);
+ report_status(st, "[Compute Library] Failed to create queue!");
+ if (status)
+ {
+ *status = st;
+ }
+ }
+ /** Block until all the tasks of the queue have been marked as finished
+ *
+ * @return Status code
+ */
+ StatusCode finish()
+ {
+ return detail::as_enum<StatusCode>(AclQueueFinish(_object.get()));
+ }
+};
+
+/**< Data type enumeration */
+enum class DataType
+{
+ Unknown = AclDataTypeUnknown,
+ UInt8 = AclUInt8,
+ Int8 = AclInt8,
+ UInt16 = AclUInt16,
+ Int16 = AclInt16,
+ UInt32 = AclUint32,
+ Int32 = AclInt32,
+ Float16 = AclFloat16,
+ BFloat16 = AclBFloat16,
+ Float32 = AclFloat32,
+};
+
+/** Tensor Descriptor class
+ *
+ * Structure that contains all the required meta-data to represent a tensor
+ */
+class TensorDescriptor
+{
+public:
+ /** Constructor
+ *
+ * @param[in] shape Shape of the tensor
+ * @param[in] data_type Data type of the tensor
+ */
+ TensorDescriptor(const std::vector<int32_t> &shape, DataType data_type) : _shape(shape), _data_type(data_type)
+ {
+ _cdesc.ndims = _shape.size();
+ _cdesc.shape = _shape.data();
+ _cdesc.data_type = detail::as_cenum<AclDataType>(_data_type);
+ _cdesc.strides = nullptr;
+ _cdesc.boffset = 0;
+ }
+ /** Constructor
+ *
+ * @param[in] desc C-type descriptor
+ */
+ explicit TensorDescriptor(const AclTensorDescriptor &desc)
+ {
+ _cdesc = desc;
+ _data_type = detail::as_enum<DataType>(desc.data_type);
+ _shape.reserve(desc.ndims);
+ for (int32_t d = 0; d < desc.ndims; ++d)
+ {
+ _shape.emplace_back(desc.shape[d]);
+ }
+ }
+ /** Get underlying C tensor descriptor
+ *
+ * @return Underlying structure
+ */
+ const AclTensorDescriptor *get() const
+ {
+ return &_cdesc;
+ }
+ /** Operator to compare two TensorDescriptor
+ *
+ * @param[in] other The instance to compare against
+ *
+ * @return True if two instances have the same shape and data type
+ */
+ bool operator==(const TensorDescriptor &other)
+ {
+ bool is_same = true;
+
+ is_same &= _data_type == other._data_type;
+ is_same &= _shape.size() == other._shape.size();
+
+ if (is_same)
+ {
+ for (uint32_t d = 0; d < _shape.size(); ++d)
+ {
+ is_same &= _shape[d] == other._shape[d];
+ }
+ }
+
+ return is_same;
+ }
+
+private:
+ std::vector<int32_t> _shape{};
+ DataType _data_type{};
+ AclTensorDescriptor _cdesc{};
+};
+
+/** Import memory types */
+enum class ImportType
+{
+ Host = AclImportMemoryType::AclHostPtr
+};
+
+/** Tensor class
+ *
+ * Tensor is an mathematical construct that can represent an N-Dimensional space.
+ *
+ * @note Maximum dimensionality support is 6 internally at the moment
+ */
+class Tensor : public detail::ObjectBase<AclTensor_>
+{
+public:
+ /** Constructor
+ *
+ * @note Tensor memory is allocated
+ *
+ * @param[in] ctx Context from where the tensor will be created from
+ * @param[in] desc Tensor descriptor to be used
+ * @param[out] status Status information if requested
+ */
+ Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr) : Tensor(ctx, desc, true, status)
+ {
+ }
+ /** Constructor
+ *
+ * @param[in] ctx Context from where the tensor will be created from
+ * @param[in] desc Tensor descriptor to be used
+ * @param[in] allocate Flag to indicate if the tensor needs to be allocated
+ * @param[out] status Status information if requested
+ */
+ Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status)
+ {
+ AclTensor tensor;
+ const auto st = detail::as_enum<StatusCode>(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate));
+ reset(tensor);
+ report_status(st, "[Compute Library] Failed to create tensor!");
+ if (status)
+ {
+ *status = st;
+ }
+ }
+ /** Maps the backing memory of a given tensor that can be used by the host to access any contents
+ *
+ * @return A valid non-zero pointer in case of success else nullptr
+ */
+ void *map()
+ {
+ void *handle = nullptr;
+ const auto st = detail::as_enum<StatusCode>(AclMapTensor(_object.get(), &handle));
+ report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
+ return handle;
+ }
+ /** Unmaps tensor's memory
+ *
+ * @param[in] handle Handle to unmap
+ *
+ * @return Status code
+ */
+ StatusCode unmap(void *handle)
+ {
+ const auto st = detail::as_enum<StatusCode>(AclUnmapTensor(_object.get(), handle));
+ report_status(st, "[Compute Library] Failed to unmap the tensor!");
+ return st;
+ }
+ /** Import external memory to a given tensor object
+ *
+ * @param[in] handle External memory handle
+ * @param[in] type Type of memory to be imported
+ *
+ * @return Status code
+ */
+ StatusCode import(void *handle, ImportType type)
+ {
+ const auto st = detail::as_enum<StatusCode>(
+ AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(type)));
+ report_status(st, "[Compute Library] Failed to import external memory to tensor!");
+ return st;
+ }
+ /** Get the size of the tensor in byte
+ *
+ * @note The size isn't based on allocated memory, but based on information in its descriptor (dimensions, data type, etc.).
+ *
+ * @return The size of the tensor in byte
+ */
+ uint64_t get_size()
+ {
+ uint64_t size{0};
+ const auto st = detail::as_enum<StatusCode>(AclGetTensorSize(_object.get(), &size));
+ report_status(st, "[Compute Library] Failed to get the size of the tensor");
+ return size;
+ }
+ /** Get the descriptor of this tensor
+ *
+ * @return The descriptor describing the characteristics of this tensor
+ */
+ TensorDescriptor get_descriptor()
+ {
+ AclTensorDescriptor desc;
+ const auto st = detail::as_enum<StatusCode>(AclGetTensorDescriptor(_object.get(), &desc));
+ report_status(st, "[Compute Library] Failed to get the descriptor of the tensor");
+ return TensorDescriptor(desc);
+ }
+};
+
+/** Tensor pack class
+ *
+ * Pack is a utility construct that is used to create a collection of tensors that can then
+ * be passed into operator as inputs.
+ */
+class TensorPack : public detail::ObjectBase<AclTensorPack_>
+{
+public:
+ /** Pack pair construct */
+ struct PackPair
+ {
+ /** Constructor
+ *
+ * @param[in] tensor_ Tensor to pack
+ * @param[in] slot_id_ Slot identification of the tensor in respect with the operator
+ */
+ PackPair(Tensor *tensor_, int32_t slot_id_) : tensor(tensor_), slot_id(slot_id_)
+ {
+ }
+
+ Tensor *tensor{nullptr}; /**< Tensor object */
+ int32_t slot_id{AclSlotUnknown}; /**< Slot id in respect with the operator */
+ };
+
+public:
+ /** Constructor
+ *
+ * @param[in] ctx Context from where the tensor pack will be created from
+ * @param[out] status Status information if requested
+ */
+ explicit TensorPack(Context &ctx, StatusCode *status = nullptr)
+ {
+ AclTensorPack pack;
+ const auto st = detail::as_enum<StatusCode>(AclCreateTensorPack(&pack, ctx.get()));
+ reset(pack);
+ report_status(st, "[Compute Library] Failure during tensor pack creation");
+ if (status)
+ {
+ *status = st;
+ }
+ }
+ /** Add tensor to tensor pack
+ *
+ * @param[in] slot_id Slot id of the tensor in respect with the operator
+ * @param[in] tensor Tensor to be added in the pack
+ *
+ * @return Status code
+ */
+ StatusCode add(Tensor &tensor, int32_t slot_id)
+ {
+ return detail::as_enum<StatusCode>(AclPackTensor(_object.get(), tensor.get(), slot_id));
+ }
+ /** Add a list of tensors to a tensor pack
+ *
+ * @param[in] packed Pair packs to be added
+ *
+ * @return Status code
+ */
+ StatusCode add(std::initializer_list<PackPair> packed)
+ {
+ const size_t size = packed.size();
+ std::vector<int32_t> slots(size);
+ std::vector<AclTensor> tensors(size);
+ int i = 0;
+ for (auto &p : packed)
+ {
+ slots[i] = p.slot_id;
+ tensors[i] = AclTensor(p.tensor);
+ ++i;
+ }
+ return detail::as_enum<StatusCode>(AclPackTensors(_object.get(), tensors.data(), slots.data(), size));
+ }
+};
+
+/** Operator class
+ *
+ * Operators are the basic algorithmic blocks responsible for performing distinct operations
+ */
+class Operator : public detail::ObjectBase<AclOperator_>
+{
+public:
+ /** Run an operator on a given input list
+ *
+ * @param[in,out] queue Queue to scheduler the operator on
+ * @param pack Tensor list to be used as input
+ *
+ * @return Status Code
+ */
+ StatusCode run(Queue &queue, TensorPack &pack)
+ {
+ return detail::as_cenum<StatusCode>(AclRunOperator(_object.get(), queue.get(), pack.get()));
+ }
+
+protected:
+ /** Constructor */
+ Operator() = default;
+};
+
+/// Operators
+using ActivationDesc = AclActivationDescriptor;
+class Activation : public Operator
+{
+public:
+ Activation(Context &ctx,
+ const TensorDescriptor &src,
+ const TensorDescriptor &dst,
+ const ActivationDesc &desc,
+ StatusCode *status = nullptr)
+ {
+ AclOperator op;
+ const auto st = detail::as_enum<StatusCode>(AclActivation(&op, ctx.get(), src.get(), dst.get(), desc));
+ reset(op);
+ report_status(st, "[Compute Library] Failure during Activation operator creation");
+ if (status)
{
*status = st;
}