diff options
Diffstat (limited to 'arm_compute/Acl.hpp')
-rw-r--r-- | arm_compute/Acl.hpp | 796 |
1 files changed, 796 insertions, 0 deletions
diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp new file mode 100644 index 0000000000..6a9d585c14 --- /dev/null +++ b/arm_compute/Acl.hpp @@ -0,0 +1,796 @@ +/* + * Copyright (c) 2021 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_ACL_HPP_ +#define ARM_COMPUTE_ACL_HPP_ + +#include "arm_compute/Acl.h" + +#include <cstdlib> +#include <memory> +#include <string> +#include <vector> + +#if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) +#include <exception> +#endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ + +// Helper Macros +#define ARM_COMPUTE_IGNORE_UNUSED(x) (void)(x) + +namespace acl +{ +// Forward declarations +class Context; +class Queue; +class Tensor; +class TensorPack; + +/**< Status code enum */ +enum class StatusCode +{ + Success = AclSuccess, + RuntimeError = AclRuntimeError, + OutOfMemory = AclOutOfMemory, + Unimplemented = AclUnimplemented, + UnsupportedTarget = AclUnsupportedTarget, + InvalidArgument = AclInvalidArgument, + InvalidTarget = AclInvalidTarget, + UnsupportedConfig = AclUnsupportedConfig, + InvalidObjectState = AclInvalidObjectState, +}; + +/**< Utility namespace containing helpers functions */ +namespace detail +{ +/** Construct to handle destruction of objects + * + * @tparam T Object base type + */ +template <typename T> +struct ObjectDeleter +{ +}; + +#define OBJECT_DELETER(obj, func) \ + template <> \ + struct ObjectDeleter<obj> \ + \ + { \ + static inline AclStatus Destroy(obj v) \ + { \ + return func(v); \ + } \ + }; + +OBJECT_DELETER(AclContext, AclDestroyContext) +OBJECT_DELETER(AclQueue, AclDestroyQueue) +OBJECT_DELETER(AclTensor, AclDestroyTensor) +OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack) +OBJECT_DELETER(AclOperator, AclDestroyOperator) + +#undef OBJECT_DELETER + +/** Convert a strongly typed enum to an old plain c enum + * + * @tparam E Plain old C enum + * @tparam SE Strongly typed resulting enum + * + * @param[in] v Value to convert + * + * @return A corresponding plain old C enumeration + */ +template <typename E, typename SE> +constexpr E as_cenum(SE v) noexcept +{ + return static_cast<E>(static_cast<typename std::underlying_type<SE>::type>(v)); +} + +/** Convert plain old enumeration to a strongly typed enum + * + * @tparam SE Strongly typed resulting enum + * @tparam E Plain old C enum + * + * @param[in] val Value to convert + * + * @return A corresponding strongly typed enumeration + */ +template <typename SE, typename E> +constexpr SE as_enum(E val) noexcept +{ + return static_cast<SE>(val); +} + +/** Object base class for library objects + * + * Class is defining basic common interface for all the library objects + * + * @tparam T Object type to be templated on + */ +template <typename T> +class ObjectBase +{ +public: + /** Destructor */ + ~ObjectBase() = default; + /** Copy constructor */ + ObjectBase(const ObjectBase<T> &) = default; + /** Move Constructor */ + ObjectBase(ObjectBase<T> &&) = default; + /** Copy assignment operator */ + ObjectBase<T> &operator=(const ObjectBase<T> &) = default; + /** Move assignment operator */ + ObjectBase<T> &operator=(ObjectBase<T> &&) = default; + /** Reset object value + * + * @param [in] val Value to set + */ + void reset(T *val) + { + _object.reset(val, detail::ObjectDeleter<T *>::Destroy); + } + /** Access uderlying object + * + * @return Underlying object + */ + const T *get() const + { + return _object.get(); + } + /** Access uderlying object + * + * @return Underlying object + */ + T *get() + { + return _object.get(); + } + +protected: + /** Constructor */ + ObjectBase() = default; + +protected: + std::shared_ptr<T> _object{nullptr}; /**< Library object */ +}; + +/** Equality operator for library object + * + * @tparam T Parameter to template on + * + * @param[in] lhs Left hand-side argument + * @param[in] rhs Right hand-side argument + * + * @return True if objects are equal, else false + */ +template <typename T> +bool operator==(const ObjectBase<T> &lhs, const ObjectBase<T> &rhs) +{ + return lhs.get() == rhs.get(); +} + +/** Inequality operator for library object + * + * @tparam T Parameter to template on + * + * @param[in] lhs Left hand-side argument + * @param[in] rhs Right hand-side argument + * + * @return True if objects are equal, else false + */ +template <typename T> +bool operator!=(const ObjectBase<T> &lhs, const ObjectBase<T> &rhs) +{ + return !(lhs == rhs); +} +} // namespace detail + +#if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) +/** Status class + * + * Class is an extension of std::exception and contains the underlying + * status construct and an error explanatory message to be reported. + * + * @note Class is visible only when exceptions are enabled during compilation + */ +class Status : public std::exception +{ +public: + /** Constructor + * + * @param[in] status Status returned + * @param[in] msg Error message to be bound with the exception + */ + Status(StatusCode status, const std::string &msg) : _status(status), _msg(msg) + { + } + /** Returns an explanatory exception message + * + * @return Status message + */ + const char *what() const noexcept override + { + return _msg.c_str(); + } + /** Underlying status accessor + * + * @return Status code + */ + StatusCode status() const + { + return _status; + } + /** Explicit status converter + * + * @return Status code + */ + explicit operator StatusCode() const + { + return _status; + } + +private: + StatusCode _status; /**< Status code */ + std::string _msg; /**< Status message */ +}; + +/** Reports an error status and throws an exception object in case of failure + * + * @note This implementation is used when exceptions are enabled during compilation + * + * @param[in] status Status to report + * @param[in] msg Explanatory error messaged + * + * @return Status code + */ +static inline void report_status(StatusCode status, const std::string &msg) +{ + if (status != StatusCode::Success) + { + throw Status(status, msg); + } +} +#else /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ +/** Reports a status code + * + * @note This implementation is used when exceptions are disabled during compilation + * @note Message is surpressed and not reported in this case + * + * @param[in] status Status to report + * @param[in] msg Explanatory error messaged + * + * @return Status code + */ +static inline void report_status(StatusCode status, const std::string &msg) +{ + ARM_COMPUTE_IGNORE_UNUSED(status); + ARM_COMPUTE_IGNORE_UNUSED(msg); +} +#endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ + +/**< Target enum */ +enum class Target +{ + Cpu = AclCpu, /**< Cpu target that leverages SIMD */ + GpuOcl = AclGpuOcl /**< Gpu target that leverages OpenCL */ +}; + +/**< Available execution modes */ +enum class ExecutionMode +{ + FastRerun = + AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */ + FastStart = AclPreferFastStart, /**< Prefer minimizing startup time */ +}; + +/** Context class + * + * Context acts as a central aggregate service for further objects created from it. + * It provides, internally, common facilities in order to avoid the use of global + * statically initialized objects that can lead to important side-effect under + * specific execution contexts. + * + * For example context contains allocators for object creation, for further backing memory allocation, + * any serialization interfaces and other modules that affect the construction of objects, + * like program caches for OpenCL. + */ +class Context : public detail::ObjectBase<AclContext_> +{ +public: + /**< Context options */ + struct Options + { + static constexpr int32_t num_threads_auto = -1; /**< Allow runtime to specify number of threads */ + + /** Default Constructor + * + * @note By default no precision loss is enabled for operators + * @note By default the preferred execution mode is to favor multiple consecutive reruns of an operator + */ + Options() + : Options(ExecutionMode::FastRerun /* mode */, + AclCpuCapabilitiesAuto /* caps */, + false /* enable_fast_math */, + nullptr /* kernel_config */, + num_threads_auto /* max_compute_units */, + nullptr /* allocator */) + { + } + /** Constructor + * + * @param[in] mode Execution mode to be used + * @param[in] caps Capabilities to be used + * @param[in] enable_fast_math Allow precision loss in favor of performance + * @param[in] kernel_config Kernel configuration file containing construction tuning meta-data + * @param[in] max_compute_units Max compute units that are expected to used + * @param[in] allocator Allocator to be used for internal memory allocation + */ + Options(ExecutionMode mode, + AclTargetCapabilities caps, + bool enable_fast_math, + const char *kernel_config, + int32_t max_compute_units, + AclAllocator *allocator) + { + copts.mode = detail::as_cenum<AclExecutionMode>(mode); + copts.capabilities = caps; + copts.enable_fast_math = enable_fast_math; + copts.kernel_config_file = kernel_config; + copts.max_compute_units = max_compute_units; + copts.allocator = allocator; + } + + AclContextOptions copts{}; + }; + +public: + /** Constructor + * + * @note Serves as a simpler delegate constructor + * @note As context options, default conservative options will be used + * + * @param[in] target Target to create context for + * @param[out] status Status information if requested + */ + explicit Context(Target target, StatusCode *status = nullptr) : Context(target, Options(), status) + { + } + /** Constructor + * + * @param[in] target Target to create context for + * @param[in] options Context construction options + * @param[out] status Status information if requested + */ + Context(Target target, const Options &options, StatusCode *status = nullptr) + { + AclContext ctx; + const auto st = + detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.copts)); + reset(ctx); + report_status(st, "[Compute Library] Failed to create context"); + if (status) + { + *status = st; + } + } +}; + +/**< Available tuning modes */ +enum class TuningMode +{ + Rapid = AclRapid, + Normal = AclNormal, + Exhaustive = AclExhaustive +}; + +/** Queue class + * + * Queue is responsible for the execution related aspects, with main responsibilities those of + * scheduling and tuning operators. + * + * Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently. + * + * @note An operator might depend on the maximum possible compute units that are provided in the context, + * thus in cases where the number of the scheduling units of the queue are greater might lead to errors. + */ +class Queue : public detail::ObjectBase<AclQueue_> +{ +public: + /**< Queue options */ + struct Options + { + /** Default Constructor + * + * As default options, no tuning will be performed, and the number of scheduling units will + * depends on internal device discovery functionality + */ + Options() : opts{AclTuningModeNone, 0} {}; + /** Constructor + * + * @param[in] mode Tuning mode to be used + * @param[in] compute_units Number of scheduling units to be used + */ + Options(TuningMode mode, int32_t compute_units) : opts{detail::as_cenum<AclTuningMode>(mode), compute_units} + { + } + + AclQueueOptions opts; + }; + +public: + /** Constructor + * + * @note Serves as a simpler delegate constructor + * @note As queue options, default conservative options will be used + * + * @param[in] ctx Context to create queue for + * @param[out] status Status information if requested + */ + explicit Queue(Context &ctx, StatusCode *status = nullptr) : Queue(ctx, Options(), status) + { + } + /** Constructor + * + * @note As queue options, default conservative options will be used + * + * @param[in] ctx Context from where the queue will be created from + * @param[in] options Queue options to be used + * @param[out] status Status information if requested + */ + explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr) + { + AclQueue queue; + const auto st = detail::as_enum<StatusCode>(AclCreateQueue(&queue, ctx.get(), &options.opts)); + reset(queue); + report_status(st, "[Compute Library] Failed to create queue!"); + if (status) + { + *status = st; + } + } + /** Block until all the tasks of the queue have been marked as finished + * + * @return Status code + */ + StatusCode finish() + { + return detail::as_enum<StatusCode>(AclQueueFinish(_object.get())); + } +}; + +/**< Data type enumeration */ +enum class DataType +{ + Unknown = AclDataTypeUnknown, + UInt8 = AclUInt8, + Int8 = AclInt8, + UInt16 = AclUInt16, + Int16 = AclInt16, + UInt32 = AclUint32, + Int32 = AclInt32, + Float16 = AclFloat16, + BFloat16 = AclBFloat16, + Float32 = AclFloat32, +}; + +/** Tensor Descriptor class + * + * Structure that contains all the required meta-data to represent a tensor + */ +class TensorDescriptor +{ +public: + /** Constructor + * + * @param[in] shape Shape of the tensor + * @param[in] data_type Data type of the tensor + */ + TensorDescriptor(const std::vector<int32_t> &shape, DataType data_type) : _shape(shape), _data_type(data_type) + { + _cdesc.ndims = _shape.size(); + _cdesc.shape = _shape.data(); + _cdesc.data_type = detail::as_cenum<AclDataType>(_data_type); + _cdesc.strides = nullptr; + _cdesc.boffset = 0; + } + /** Constructor + * + * @param[in] desc C-type descriptor + */ + explicit TensorDescriptor(const AclTensorDescriptor &desc) + { + _cdesc = desc; + _data_type = detail::as_enum<DataType>(desc.data_type); + _shape.reserve(desc.ndims); + for (int32_t d = 0; d < desc.ndims; ++d) + { + _shape.emplace_back(desc.shape[d]); + } + } + /** Get underlying C tensor descriptor + * + * @return Underlying structure + */ + const AclTensorDescriptor *get() const + { + return &_cdesc; + } + /** Operator to compare two TensorDescriptor + * + * @param[in] other The instance to compare against + * + * @return True if two instances have the same shape and data type + */ + bool operator==(const TensorDescriptor &other) + { + bool is_same = true; + + is_same &= _data_type == other._data_type; + is_same &= _shape.size() == other._shape.size(); + + if (is_same) + { + for (uint32_t d = 0; d < _shape.size(); ++d) + { + is_same &= _shape[d] == other._shape[d]; + } + } + + return is_same; + } + +private: + std::vector<int32_t> _shape{}; + DataType _data_type{}; + AclTensorDescriptor _cdesc{}; +}; + +/** Import memory types */ +enum class ImportType +{ + Host = AclImportMemoryType::AclHostPtr +}; + +/** Tensor class + * + * Tensor is an mathematical construct that can represent an N-Dimensional space. + * + * @note Maximum dimensionality support is 6 internally at the moment + */ +class Tensor : public detail::ObjectBase<AclTensor_> +{ +public: + /** Constructor + * + * @note Tensor memory is allocated + * + * @param[in] ctx Context from where the tensor will be created from + * @param[in] desc Tensor descriptor to be used + * @param[out] status Status information if requested + */ + Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr) : Tensor(ctx, desc, true, status) + { + } + /** Constructor + * + * @param[in] ctx Context from where the tensor will be created from + * @param[in] desc Tensor descriptor to be used + * @param[in] allocate Flag to indicate if the tensor needs to be allocated + * @param[out] status Status information if requested + */ + Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status) + { + AclTensor tensor; + const auto st = detail::as_enum<StatusCode>(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate)); + reset(tensor); + report_status(st, "[Compute Library] Failed to create tensor!"); + if (status) + { + *status = st; + } + } + /** Maps the backing memory of a given tensor that can be used by the host to access any contents + * + * @return A valid non-zero pointer in case of success else nullptr + */ + void *map() + { + void *handle = nullptr; + const auto st = detail::as_enum<StatusCode>(AclMapTensor(_object.get(), &handle)); + report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!"); + return handle; + } + /** Unmaps tensor's memory + * + * @param[in] handle Handle to unmap + * + * @return Status code + */ + StatusCode unmap(void *handle) + { + const auto st = detail::as_enum<StatusCode>(AclUnmapTensor(_object.get(), handle)); + report_status(st, "[Compute Library] Failed to unmap the tensor!"); + return st; + } + /** Import external memory to a given tensor object + * + * @param[in] handle External memory handle + * @param[in] type Type of memory to be imported + * + * @return Status code + */ + StatusCode import(void *handle, ImportType type) + { + const auto st = detail::as_enum<StatusCode>( + AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(type))); + report_status(st, "[Compute Library] Failed to import external memory to tensor!"); + return st; + } + /** Get the size of the tensor in byte + * + * @note The size isn't based on allocated memory, but based on information in its descriptor (dimensions, data type, etc.). + * + * @return The size of the tensor in byte + */ + uint64_t get_size() + { + uint64_t size{0}; + const auto st = detail::as_enum<StatusCode>(AclGetTensorSize(_object.get(), &size)); + report_status(st, "[Compute Library] Failed to get the size of the tensor"); + return size; + } + /** Get the descriptor of this tensor + * + * @return The descriptor describing the characteristics of this tensor + */ + TensorDescriptor get_descriptor() + { + AclTensorDescriptor desc; + const auto st = detail::as_enum<StatusCode>(AclGetTensorDescriptor(_object.get(), &desc)); + report_status(st, "[Compute Library] Failed to get the descriptor of the tensor"); + return TensorDescriptor(desc); + } +}; + +/** Tensor pack class + * + * Pack is a utility construct that is used to create a collection of tensors that can then + * be passed into operator as inputs. + */ +class TensorPack : public detail::ObjectBase<AclTensorPack_> +{ +public: + /** Pack pair construct */ + struct PackPair + { + /** Constructor + * + * @param[in] tensor_ Tensor to pack + * @param[in] slot_id_ Slot identification of the tensor in respect with the operator + */ + PackPair(Tensor *tensor_, int32_t slot_id_) : tensor(tensor_), slot_id(slot_id_) + { + } + + Tensor *tensor{nullptr}; /**< Tensor object */ + int32_t slot_id{AclSlotUnknown}; /**< Slot id in respect with the operator */ + }; + +public: + /** Constructor + * + * @param[in] ctx Context from where the tensor pack will be created from + * @param[out] status Status information if requested + */ + explicit TensorPack(Context &ctx, StatusCode *status = nullptr) + { + AclTensorPack pack; + const auto st = detail::as_enum<StatusCode>(AclCreateTensorPack(&pack, ctx.get())); + reset(pack); + report_status(st, "[Compute Library] Failure during tensor pack creation"); + if (status) + { + *status = st; + } + } + /** Add tensor to tensor pack + * + * @param[in] slot_id Slot id of the tensor in respect with the operator + * @param[in] tensor Tensor to be added in the pack + * + * @return Status code + */ + StatusCode add(Tensor &tensor, int32_t slot_id) + { + return detail::as_enum<StatusCode>(AclPackTensor(_object.get(), tensor.get(), slot_id)); + } + /** Add a list of tensors to a tensor pack + * + * @param[in] packed Pair packs to be added + * + * @return Status code + */ + StatusCode add(std::initializer_list<PackPair> packed) + { + const size_t size = packed.size(); + std::vector<int32_t> slots(size); + std::vector<AclTensor> tensors(size); + int i = 0; + for (auto &p : packed) + { + slots[i] = p.slot_id; + tensors[i] = AclTensor(p.tensor); + ++i; + } + return detail::as_enum<StatusCode>(AclPackTensors(_object.get(), tensors.data(), slots.data(), size)); + } +}; + +/** Operator class + * + * Operators are the basic algorithmic blocks responsible for performing distinct operations + */ +class Operator : public detail::ObjectBase<AclOperator_> +{ +public: + /** Run an operator on a given input list + * + * @param[in,out] queue Queue to scheduler the operator on + * @param pack Tensor list to be used as input + * + * @return Status Code + */ + StatusCode run(Queue &queue, TensorPack &pack) + { + return detail::as_cenum<StatusCode>(AclRunOperator(_object.get(), queue.get(), pack.get())); + } + +protected: + /** Constructor */ + Operator() = default; +}; + +/// Operators +using ActivationDesc = AclActivationDescriptor; +class Activation : public Operator +{ +public: + Activation(Context &ctx, + const TensorDescriptor &src, + const TensorDescriptor &dst, + const ActivationDesc &desc, + StatusCode *status = nullptr) + { + AclOperator op; + const auto st = detail::as_enum<StatusCode>(AclActivation(&op, ctx.get(), src.get(), dst.get(), desc)); + reset(op); + report_status(st, "[Compute Library] Failure during Activation operator creation"); + if (status) + { + *status = st; + } + } +}; +} // namespace acl +#undef ARM_COMPUTE_IGNORE_UNUSED +#endif /* ARM_COMPUTE_ACL_HPP_ */ |