diff options
Diffstat (limited to 'arm_compute/Acl.hpp')
-rw-r--r-- | arm_compute/Acl.hpp | 461 |
1 files changed, 438 insertions, 23 deletions
diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp index b74e65430c..6a9d585c14 100644 --- a/arm_compute/Acl.hpp +++ b/arm_compute/Acl.hpp @@ -29,6 +29,7 @@ #include <cstdlib> #include <memory> #include <string> +#include <vector> #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) #include <exception> @@ -41,6 +42,9 @@ namespace acl { // Forward declarations class Context; +class Queue; +class Tensor; +class TensorPack; /**< Status code enum */ enum class StatusCode @@ -71,7 +75,7 @@ struct ObjectDeleter #define OBJECT_DELETER(obj, func) \ template <> \ struct ObjectDeleter<obj> \ - \ + \ { \ static inline AclStatus Destroy(obj v) \ { \ @@ -80,6 +84,10 @@ struct ObjectDeleter }; OBJECT_DELETER(AclContext, AclDestroyContext) +OBJECT_DELETER(AclQueue, AclDestroyQueue) +OBJECT_DELETER(AclTensor, AclDestroyTensor) +OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack) +OBJECT_DELETER(AclOperator, AclDestroyOperator) #undef OBJECT_DELETER @@ -163,7 +171,7 @@ protected: ObjectBase() = default; protected: - std::shared_ptr<T> _object{ nullptr }; /**< Library object */ + std::shared_ptr<T> _object{nullptr}; /**< Library object */ }; /** Equality operator for library object @@ -213,8 +221,7 @@ public: * @param[in] status Status returned * @param[in] msg Error message to be bound with the exception */ - Status(StatusCode status, const std::string &msg) - : _status(status), _msg(msg) + Status(StatusCode status, const std::string &msg) : _status(status), _msg(msg) { } /** Returns an explanatory exception message @@ -256,13 +263,12 @@ private: * * @return Status code */ -static inline StatusCode report_status(StatusCode status, const std::string &msg) +static inline void report_status(StatusCode status, const std::string &msg) { - if(status != StatusCode::Success) + if (status != StatusCode::Success) { throw Status(status, msg); } - return status; } #else /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ /** Reports a status code @@ -275,10 +281,10 @@ static inline StatusCode report_status(StatusCode status, const std::string &msg * * @return Status code */ -static inline StatusCode report_status(StatusCode status, const std::string &msg) +static inline void report_status(StatusCode status, const std::string &msg) { + ARM_COMPUTE_IGNORE_UNUSED(status); ARM_COMPUTE_IGNORE_UNUSED(msg); - return status; } #endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ @@ -292,7 +298,8 @@ enum class Target /**< Available execution modes */ enum class ExecutionMode { - FastRerun = AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */ + FastRerun = + AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */ FastStart = AclPreferFastStart, /**< Prefer minimizing startup time */ }; @@ -313,12 +320,22 @@ public: /**< Context options */ struct Options { + static constexpr int32_t num_threads_auto = -1; /**< Allow runtime to specify number of threads */ + /** Default Constructor * * @note By default no precision loss is enabled for operators * @note By default the preferred execution mode is to favor multiple consecutive reruns of an operator */ - Options() = default; + Options() + : Options(ExecutionMode::FastRerun /* mode */, + AclCpuCapabilitiesAuto /* caps */, + false /* enable_fast_math */, + nullptr /* kernel_config */, + num_threads_auto /* max_compute_units */, + nullptr /* allocator */) + { + } /** Constructor * * @param[in] mode Execution mode to be used @@ -335,14 +352,15 @@ public: int32_t max_compute_units, AclAllocator *allocator) { - opts.mode = detail::as_cenum<AclExecutionMode>(mode); - opts.capabilities = caps; - opts.enable_fast_math = enable_fast_math; - opts.kernel_config_file = kernel_config; - opts.max_compute_units = max_compute_units; - opts.allocator = allocator; + copts.mode = detail::as_cenum<AclExecutionMode>(mode); + copts.capabilities = caps; + copts.enable_fast_math = enable_fast_math; + copts.kernel_config_file = kernel_config; + copts.max_compute_units = max_compute_units; + copts.allocator = allocator; } - AclContextOptions opts{ acl_default_ctx_options }; + + AclContextOptions copts{}; }; public: @@ -354,8 +372,7 @@ public: * @param[in] target Target to create context for * @param[out] status Status information if requested */ - explicit Context(Target target, StatusCode *status = nullptr) - : Context(target, Options(), status) + explicit Context(Target target, StatusCode *status = nullptr) : Context(target, Options(), status) { } /** Constructor @@ -367,10 +384,408 @@ public: Context(Target target, const Options &options, StatusCode *status = nullptr) { AclContext ctx; - const auto st = detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.opts)); + const auto st = + detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.copts)); reset(ctx); - report_status(st, "Failure during context creation"); - if(status) + report_status(st, "[Compute Library] Failed to create context"); + if (status) + { + *status = st; + } + } +}; + +/**< Available tuning modes */ +enum class TuningMode +{ + Rapid = AclRapid, + Normal = AclNormal, + Exhaustive = AclExhaustive +}; + +/** Queue class + * + * Queue is responsible for the execution related aspects, with main responsibilities those of + * scheduling and tuning operators. + * + * Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently. + * + * @note An operator might depend on the maximum possible compute units that are provided in the context, + * thus in cases where the number of the scheduling units of the queue are greater might lead to errors. + */ +class Queue : public detail::ObjectBase<AclQueue_> +{ +public: + /**< Queue options */ + struct Options + { + /** Default Constructor + * + * As default options, no tuning will be performed, and the number of scheduling units will + * depends on internal device discovery functionality + */ + Options() : opts{AclTuningModeNone, 0} {}; + /** Constructor + * + * @param[in] mode Tuning mode to be used + * @param[in] compute_units Number of scheduling units to be used + */ + Options(TuningMode mode, int32_t compute_units) : opts{detail::as_cenum<AclTuningMode>(mode), compute_units} + { + } + + AclQueueOptions opts; + }; + +public: + /** Constructor + * + * @note Serves as a simpler delegate constructor + * @note As queue options, default conservative options will be used + * + * @param[in] ctx Context to create queue for + * @param[out] status Status information if requested + */ + explicit Queue(Context &ctx, StatusCode *status = nullptr) : Queue(ctx, Options(), status) + { + } + /** Constructor + * + * @note As queue options, default conservative options will be used + * + * @param[in] ctx Context from where the queue will be created from + * @param[in] options Queue options to be used + * @param[out] status Status information if requested + */ + explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr) + { + AclQueue queue; + const auto st = detail::as_enum<StatusCode>(AclCreateQueue(&queue, ctx.get(), &options.opts)); + reset(queue); + report_status(st, "[Compute Library] Failed to create queue!"); + if (status) + { + *status = st; + } + } + /** Block until all the tasks of the queue have been marked as finished + * + * @return Status code + */ + StatusCode finish() + { + return detail::as_enum<StatusCode>(AclQueueFinish(_object.get())); + } +}; + +/**< Data type enumeration */ +enum class DataType +{ + Unknown = AclDataTypeUnknown, + UInt8 = AclUInt8, + Int8 = AclInt8, + UInt16 = AclUInt16, + Int16 = AclInt16, + UInt32 = AclUint32, + Int32 = AclInt32, + Float16 = AclFloat16, + BFloat16 = AclBFloat16, + Float32 = AclFloat32, +}; + +/** Tensor Descriptor class + * + * Structure that contains all the required meta-data to represent a tensor + */ +class TensorDescriptor +{ +public: + /** Constructor + * + * @param[in] shape Shape of the tensor + * @param[in] data_type Data type of the tensor + */ + TensorDescriptor(const std::vector<int32_t> &shape, DataType data_type) : _shape(shape), _data_type(data_type) + { + _cdesc.ndims = _shape.size(); + _cdesc.shape = _shape.data(); + _cdesc.data_type = detail::as_cenum<AclDataType>(_data_type); + _cdesc.strides = nullptr; + _cdesc.boffset = 0; + } + /** Constructor + * + * @param[in] desc C-type descriptor + */ + explicit TensorDescriptor(const AclTensorDescriptor &desc) + { + _cdesc = desc; + _data_type = detail::as_enum<DataType>(desc.data_type); + _shape.reserve(desc.ndims); + for (int32_t d = 0; d < desc.ndims; ++d) + { + _shape.emplace_back(desc.shape[d]); + } + } + /** Get underlying C tensor descriptor + * + * @return Underlying structure + */ + const AclTensorDescriptor *get() const + { + return &_cdesc; + } + /** Operator to compare two TensorDescriptor + * + * @param[in] other The instance to compare against + * + * @return True if two instances have the same shape and data type + */ + bool operator==(const TensorDescriptor &other) + { + bool is_same = true; + + is_same &= _data_type == other._data_type; + is_same &= _shape.size() == other._shape.size(); + + if (is_same) + { + for (uint32_t d = 0; d < _shape.size(); ++d) + { + is_same &= _shape[d] == other._shape[d]; + } + } + + return is_same; + } + +private: + std::vector<int32_t> _shape{}; + DataType _data_type{}; + AclTensorDescriptor _cdesc{}; +}; + +/** Import memory types */ +enum class ImportType +{ + Host = AclImportMemoryType::AclHostPtr +}; + +/** Tensor class + * + * Tensor is an mathematical construct that can represent an N-Dimensional space. + * + * @note Maximum dimensionality support is 6 internally at the moment + */ +class Tensor : public detail::ObjectBase<AclTensor_> +{ +public: + /** Constructor + * + * @note Tensor memory is allocated + * + * @param[in] ctx Context from where the tensor will be created from + * @param[in] desc Tensor descriptor to be used + * @param[out] status Status information if requested + */ + Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr) : Tensor(ctx, desc, true, status) + { + } + /** Constructor + * + * @param[in] ctx Context from where the tensor will be created from + * @param[in] desc Tensor descriptor to be used + * @param[in] allocate Flag to indicate if the tensor needs to be allocated + * @param[out] status Status information if requested + */ + Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status) + { + AclTensor tensor; + const auto st = detail::as_enum<StatusCode>(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate)); + reset(tensor); + report_status(st, "[Compute Library] Failed to create tensor!"); + if (status) + { + *status = st; + } + } + /** Maps the backing memory of a given tensor that can be used by the host to access any contents + * + * @return A valid non-zero pointer in case of success else nullptr + */ + void *map() + { + void *handle = nullptr; + const auto st = detail::as_enum<StatusCode>(AclMapTensor(_object.get(), &handle)); + report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!"); + return handle; + } + /** Unmaps tensor's memory + * + * @param[in] handle Handle to unmap + * + * @return Status code + */ + StatusCode unmap(void *handle) + { + const auto st = detail::as_enum<StatusCode>(AclUnmapTensor(_object.get(), handle)); + report_status(st, "[Compute Library] Failed to unmap the tensor!"); + return st; + } + /** Import external memory to a given tensor object + * + * @param[in] handle External memory handle + * @param[in] type Type of memory to be imported + * + * @return Status code + */ + StatusCode import(void *handle, ImportType type) + { + const auto st = detail::as_enum<StatusCode>( + AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(type))); + report_status(st, "[Compute Library] Failed to import external memory to tensor!"); + return st; + } + /** Get the size of the tensor in byte + * + * @note The size isn't based on allocated memory, but based on information in its descriptor (dimensions, data type, etc.). + * + * @return The size of the tensor in byte + */ + uint64_t get_size() + { + uint64_t size{0}; + const auto st = detail::as_enum<StatusCode>(AclGetTensorSize(_object.get(), &size)); + report_status(st, "[Compute Library] Failed to get the size of the tensor"); + return size; + } + /** Get the descriptor of this tensor + * + * @return The descriptor describing the characteristics of this tensor + */ + TensorDescriptor get_descriptor() + { + AclTensorDescriptor desc; + const auto st = detail::as_enum<StatusCode>(AclGetTensorDescriptor(_object.get(), &desc)); + report_status(st, "[Compute Library] Failed to get the descriptor of the tensor"); + return TensorDescriptor(desc); + } +}; + +/** Tensor pack class + * + * Pack is a utility construct that is used to create a collection of tensors that can then + * be passed into operator as inputs. + */ +class TensorPack : public detail::ObjectBase<AclTensorPack_> +{ +public: + /** Pack pair construct */ + struct PackPair + { + /** Constructor + * + * @param[in] tensor_ Tensor to pack + * @param[in] slot_id_ Slot identification of the tensor in respect with the operator + */ + PackPair(Tensor *tensor_, int32_t slot_id_) : tensor(tensor_), slot_id(slot_id_) + { + } + + Tensor *tensor{nullptr}; /**< Tensor object */ + int32_t slot_id{AclSlotUnknown}; /**< Slot id in respect with the operator */ + }; + +public: + /** Constructor + * + * @param[in] ctx Context from where the tensor pack will be created from + * @param[out] status Status information if requested + */ + explicit TensorPack(Context &ctx, StatusCode *status = nullptr) + { + AclTensorPack pack; + const auto st = detail::as_enum<StatusCode>(AclCreateTensorPack(&pack, ctx.get())); + reset(pack); + report_status(st, "[Compute Library] Failure during tensor pack creation"); + if (status) + { + *status = st; + } + } + /** Add tensor to tensor pack + * + * @param[in] slot_id Slot id of the tensor in respect with the operator + * @param[in] tensor Tensor to be added in the pack + * + * @return Status code + */ + StatusCode add(Tensor &tensor, int32_t slot_id) + { + return detail::as_enum<StatusCode>(AclPackTensor(_object.get(), tensor.get(), slot_id)); + } + /** Add a list of tensors to a tensor pack + * + * @param[in] packed Pair packs to be added + * + * @return Status code + */ + StatusCode add(std::initializer_list<PackPair> packed) + { + const size_t size = packed.size(); + std::vector<int32_t> slots(size); + std::vector<AclTensor> tensors(size); + int i = 0; + for (auto &p : packed) + { + slots[i] = p.slot_id; + tensors[i] = AclTensor(p.tensor); + ++i; + } + return detail::as_enum<StatusCode>(AclPackTensors(_object.get(), tensors.data(), slots.data(), size)); + } +}; + +/** Operator class + * + * Operators are the basic algorithmic blocks responsible for performing distinct operations + */ +class Operator : public detail::ObjectBase<AclOperator_> +{ +public: + /** Run an operator on a given input list + * + * @param[in,out] queue Queue to scheduler the operator on + * @param pack Tensor list to be used as input + * + * @return Status Code + */ + StatusCode run(Queue &queue, TensorPack &pack) + { + return detail::as_cenum<StatusCode>(AclRunOperator(_object.get(), queue.get(), pack.get())); + } + +protected: + /** Constructor */ + Operator() = default; +}; + +/// Operators +using ActivationDesc = AclActivationDescriptor; +class Activation : public Operator +{ +public: + Activation(Context &ctx, + const TensorDescriptor &src, + const TensorDescriptor &dst, + const ActivationDesc &desc, + StatusCode *status = nullptr) + { + AclOperator op; + const auto st = detail::as_enum<StatusCode>(AclActivation(&op, ctx.get(), src.get(), dst.get(), desc)); + reset(op); + report_status(st, "[Compute Library] Failure during Activation operator creation"); + if (status) { *status = st; } |