/* * Copyright (c) 2021 Arm Limited. * * SPDX-License-Identifier: MIT * * Permission is hereby granted, free of charge, to any person obtaining a copy * of this software and associated documentation files (the "Software"), to * deal in the Software without restriction, including without limitation the * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or * sell copies of the Software, and to permit persons to whom the Software is * furnished to do so, subject to the following conditions: * * The above copyright notice and this permission notice shall be included in all * copies or substantial portions of the Software. * * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #ifndef ARM_COMPUTE_ACL_HPP_ #define ARM_COMPUTE_ACL_HPP_ #include "arm_compute/Acl.h" #include #include #include #include #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) #include #endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ // Helper Macros #define ARM_COMPUTE_IGNORE_UNUSED(x) (void)(x) namespace acl { // Forward declarations class Context; class Queue; class Tensor; class TensorPack; /**< Status code enum */ enum class StatusCode { Success = AclSuccess, RuntimeError = AclRuntimeError, OutOfMemory = AclOutOfMemory, Unimplemented = AclUnimplemented, UnsupportedTarget = AclUnsupportedTarget, InvalidArgument = AclInvalidArgument, InvalidTarget = AclInvalidTarget, UnsupportedConfig = AclUnsupportedConfig, InvalidObjectState = AclInvalidObjectState, }; /**< Utility namespace containing helpers functions */ namespace detail { /** Construct to handle destruction of objects * * @tparam T Object base type */ template struct ObjectDeleter { }; #define OBJECT_DELETER(obj, func) \ template <> \ struct ObjectDeleter \ \ { \ static inline AclStatus Destroy(obj v) \ { \ return func(v); \ } \ }; OBJECT_DELETER(AclContext, AclDestroyContext) OBJECT_DELETER(AclQueue, AclDestroyQueue) OBJECT_DELETER(AclTensor, AclDestroyTensor) OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack) OBJECT_DELETER(AclOperator, AclDestroyOperator) #undef OBJECT_DELETER /** Convert a strongly typed enum to an old plain c enum * * @tparam E Plain old C enum * @tparam SE Strongly typed resulting enum * * @param[in] v Value to convert * * @return A corresponding plain old C enumeration */ template constexpr E as_cenum(SE v) noexcept { return static_cast(static_cast::type>(v)); } /** Convert plain old enumeration to a strongly typed enum * * @tparam SE Strongly typed resulting enum * @tparam E Plain old C enum * * @param[in] val Value to convert * * @return A corresponding strongly typed enumeration */ template constexpr SE as_enum(E val) noexcept { return static_cast(val); } /** Object base class for library objects * * Class is defining basic common interface for all the library objects * * @tparam T Object type to be templated on */ template class ObjectBase { public: /** Destructor */ ~ObjectBase() = default; /** Copy constructor */ ObjectBase(const ObjectBase &) = default; /** Move Constructor */ ObjectBase(ObjectBase &&) = default; /** Copy assignment operator */ ObjectBase &operator=(const ObjectBase &) = default; /** Move assignment operator */ ObjectBase &operator=(ObjectBase &&) = default; /** Reset object value * * @param [in] val Value to set */ void reset(T *val) { _object.reset(val, detail::ObjectDeleter::Destroy); } /** Access uderlying object * * @return Underlying object */ const T *get() const { return _object.get(); } /** Access uderlying object * * @return Underlying object */ T *get() { return _object.get(); } protected: /** Constructor */ ObjectBase() = default; protected: std::shared_ptr _object{nullptr}; /**< Library object */ }; /** Equality operator for library object * * @tparam T Parameter to template on * * @param[in] lhs Left hand-side argument * @param[in] rhs Right hand-side argument * * @return True if objects are equal, else false */ template bool operator==(const ObjectBase &lhs, const ObjectBase &rhs) { return lhs.get() == rhs.get(); } /** Inequality operator for library object * * @tparam T Parameter to template on * * @param[in] lhs Left hand-side argument * @param[in] rhs Right hand-side argument * * @return True if objects are equal, else false */ template bool operator!=(const ObjectBase &lhs, const ObjectBase &rhs) { return !(lhs == rhs); } } // namespace detail #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) /** Status class * * Class is an extension of std::exception and contains the underlying * status construct and an error explanatory message to be reported. * * @note Class is visible only when exceptions are enabled during compilation */ class Status : public std::exception { public: /** Constructor * * @param[in] status Status returned * @param[in] msg Error message to be bound with the exception */ Status(StatusCode status, const std::string &msg) : _status(status), _msg(msg) { } /** Returns an explanatory exception message * * @return Status message */ const char *what() const noexcept override { return _msg.c_str(); } /** Underlying status accessor * * @return Status code */ StatusCode status() const { return _status; } /** Explicit status converter * * @return Status code */ explicit operator StatusCode() const { return _status; } private: StatusCode _status; /**< Status code */ std::string _msg; /**< Status message */ }; /** Reports an error status and throws an exception object in case of failure * * @note This implementation is used when exceptions are enabled during compilation * * @param[in] status Status to report * @param[in] msg Explanatory error messaged * * @return Status code */ static inline void report_status(StatusCode status, const std::string &msg) { if (status != StatusCode::Success) { throw Status(status, msg); } } #else /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ /** Reports a status code * * @note This implementation is used when exceptions are disabled during compilation * @note Message is surpressed and not reported in this case * * @param[in] status Status to report * @param[in] msg Explanatory error messaged * * @return Status code */ static inline void report_status(StatusCode status, const std::string &msg) { ARM_COMPUTE_IGNORE_UNUSED(status); ARM_COMPUTE_IGNORE_UNUSED(msg); } #endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ /**< Target enum */ enum class Target { Cpu = AclCpu, /**< Cpu target that leverages SIMD */ GpuOcl = AclGpuOcl /**< Gpu target that leverages OpenCL */ }; /**< Available execution modes */ enum class ExecutionMode { FastRerun = AclPreferFastRerun, /**< Prefer minimum latency in consecutive runs, might introduce higher startup times */ FastStart = AclPreferFastStart, /**< Prefer minimizing startup time */ }; /** Context class * * Context acts as a central aggregate service for further objects created from it. * It provides, internally, common facilities in order to avoid the use of global * statically initialized objects that can lead to important side-effect under * specific execution contexts. * * For example context contains allocators for object creation, for further backing memory allocation, * any serialization interfaces and other modules that affect the construction of objects, * like program caches for OpenCL. */ class Context : public detail::ObjectBase { public: /**< Context options */ struct Options { static constexpr int32_t num_threads_auto = -1; /**< Allow runtime to specify number of threads */ /** Default Constructor * * @note By default no precision loss is enabled for operators * @note By default the preferred execution mode is to favor multiple consecutive reruns of an operator */ Options() : Options(ExecutionMode::FastRerun /* mode */, AclCpuCapabilitiesAuto /* caps */, false /* enable_fast_math */, nullptr /* kernel_config */, num_threads_auto /* max_compute_units */, nullptr /* allocator */) { } /** Constructor * * @param[in] mode Execution mode to be used * @param[in] caps Capabilities to be used * @param[in] enable_fast_math Allow precision loss in favor of performance * @param[in] kernel_config Kernel configuration file containing construction tuning meta-data * @param[in] max_compute_units Max compute units that are expected to used * @param[in] allocator Allocator to be used for internal memory allocation */ Options(ExecutionMode mode, AclTargetCapabilities caps, bool enable_fast_math, const char *kernel_config, int32_t max_compute_units, AclAllocator *allocator) { copts.mode = detail::as_cenum(mode); copts.capabilities = caps; copts.enable_fast_math = enable_fast_math; copts.kernel_config_file = kernel_config; copts.max_compute_units = max_compute_units; copts.allocator = allocator; } AclContextOptions copts{}; }; public: /** Constructor * * @note Serves as a simpler delegate constructor * @note As context options, default conservative options will be used * * @param[in] target Target to create context for * @param[out] status Status information if requested */ explicit Context(Target target, StatusCode *status = nullptr) : Context(target, Options(), status) { } /** Constructor * * @param[in] target Target to create context for * @param[in] options Context construction options * @param[out] status Status information if requested */ Context(Target target, const Options &options, StatusCode *status = nullptr) { AclContext ctx; const auto st = detail::as_enum(AclCreateContext(&ctx, detail::as_cenum(target), &options.copts)); reset(ctx); report_status(st, "[Compute Library] Failed to create context"); if (status) { *status = st; } } }; /**< Available tuning modes */ enum class TuningMode { Rapid = AclRapid, Normal = AclNormal, Exhaustive = AclExhaustive }; /** Queue class * * Queue is responsible for the execution related aspects, with main responsibilities those of * scheduling and tuning operators. * * Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently. * * @note An operator might depend on the maximum possible compute units that are provided in the context, * thus in cases where the number of the scheduling units of the queue are greater might lead to errors. */ class Queue : public detail::ObjectBase { public: /**< Queue options */ struct Options { /** Default Constructor * * As default options, no tuning will be performed, and the number of scheduling units will * depends on internal device discovery functionality */ Options() : opts{AclTuningModeNone, 0} {}; /** Constructor * * @param[in] mode Tuning mode to be used * @param[in] compute_units Number of scheduling units to be used */ Options(TuningMode mode, int32_t compute_units) : opts{detail::as_cenum(mode), compute_units} { } AclQueueOptions opts; }; public: /** Constructor * * @note Serves as a simpler delegate constructor * @note As queue options, default conservative options will be used * * @param[in] ctx Context to create queue for * @param[out] status Status information if requested */ explicit Queue(Context &ctx, StatusCode *status = nullptr) : Queue(ctx, Options(), status) { } /** Constructor * * @note As queue options, default conservative options will be used * * @param[in] ctx Context from where the queue will be created from * @param[in] options Queue options to be used * @param[out] status Status information if requested */ explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr) { AclQueue queue; const auto st = detail::as_enum(AclCreateQueue(&queue, ctx.get(), &options.opts)); reset(queue); report_status(st, "[Compute Library] Failed to create queue!"); if (status) { *status = st; } } /** Block until all the tasks of the queue have been marked as finished * * @return Status code */ StatusCode finish() { return detail::as_enum(AclQueueFinish(_object.get())); } }; /**< Data type enumeration */ enum class DataType { Unknown = AclDataTypeUnknown, UInt8 = AclUInt8, Int8 = AclInt8, UInt16 = AclUInt16, Int16 = AclInt16, UInt32 = AclUint32, Int32 = AclInt32, Float16 = AclFloat16, BFloat16 = AclBFloat16, Float32 = AclFloat32, }; /** Tensor Descriptor class * * Structure that contains all the required meta-data to represent a tensor */ class TensorDescriptor { public: /** Constructor * * @param[in] shape Shape of the tensor * @param[in] data_type Data type of the tensor */ TensorDescriptor(const std::vector &shape, DataType data_type) : _shape(shape), _data_type(data_type) { _cdesc.ndims = _shape.size(); _cdesc.shape = _shape.data(); _cdesc.data_type = detail::as_cenum(_data_type); _cdesc.strides = nullptr; _cdesc.boffset = 0; } /** Constructor * * @param[in] desc C-type descriptor */ explicit TensorDescriptor(const AclTensorDescriptor &desc) { _cdesc = desc; _data_type = detail::as_enum(desc.data_type); _shape.reserve(desc.ndims); for (int32_t d = 0; d < desc.ndims; ++d) { _shape.emplace_back(desc.shape[d]); } } /** Get underlying C tensor descriptor * * @return Underlying structure */ const AclTensorDescriptor *get() const { return &_cdesc; } /** Operator to compare two TensorDescriptor * * @param[in] other The instance to compare against * * @return True if two instances have the same shape and data type */ bool operator==(const TensorDescriptor &other) { bool is_same = true; is_same &= _data_type == other._data_type; is_same &= _shape.size() == other._shape.size(); if (is_same) { for (uint32_t d = 0; d < _shape.size(); ++d) { is_same &= _shape[d] == other._shape[d]; } } return is_same; } private: std::vector _shape{}; DataType _data_type{}; AclTensorDescriptor _cdesc{}; }; /** Import memory types */ enum class ImportType { Host = AclImportMemoryType::AclHostPtr }; /** Tensor class * * Tensor is an mathematical construct that can represent an N-Dimensional space. * * @note Maximum dimensionality support is 6 internally at the moment */ class Tensor : public detail::ObjectBase { public: /** Constructor * * @note Tensor memory is allocated * * @param[in] ctx Context from where the tensor will be created from * @param[in] desc Tensor descriptor to be used * @param[out] status Status information if requested */ Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr) : Tensor(ctx, desc, true, status) { } /** Constructor * * @param[in] ctx Context from where the tensor will be created from * @param[in] desc Tensor descriptor to be used * @param[in] allocate Flag to indicate if the tensor needs to be allocated * @param[out] status Status information if requested */ Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status) { AclTensor tensor; const auto st = detail::as_enum(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate)); reset(tensor); report_status(st, "[Compute Library] Failed to create tensor!"); if (status) { *status = st; } } /** Maps the backing memory of a given tensor that can be used by the host to access any contents * * @return A valid non-zero pointer in case of success else nullptr */ void *map() { void *handle = nullptr; const auto st = detail::as_enum(AclMapTensor(_object.get(), &handle)); report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!"); return handle; } /** Unmaps tensor's memory * * @param[in] handle Handle to unmap * * @return Status code */ StatusCode unmap(void *handle) { const auto st = detail::as_enum(AclUnmapTensor(_object.get(), handle)); report_status(st, "[Compute Library] Failed to unmap the tensor!"); return st; } /** Import external memory to a given tensor object * * @param[in] handle External memory handle * @param[in] type Type of memory to be imported * * @return Status code */ StatusCode import(void *handle, ImportType type) { const auto st = detail::as_enum( AclTensorImport(_object.get(), handle, detail::as_cenum(type))); report_status(st, "[Compute Library] Failed to import external memory to tensor!"); return st; } /** Get the size of the tensor in byte * * @note The size isn't based on allocated memory, but based on information in its descriptor (dimensions, data type, etc.). * * @return The size of the tensor in byte */ uint64_t get_size() { uint64_t size{0}; const auto st = detail::as_enum(AclGetTensorSize(_object.get(), &size)); report_status(st, "[Compute Library] Failed to get the size of the tensor"); return size; } /** Get the descriptor of this tensor * * @return The descriptor describing the characteristics of this tensor */ TensorDescriptor get_descriptor() { AclTensorDescriptor desc; const auto st = detail::as_enum(AclGetTensorDescriptor(_object.get(), &desc)); report_status(st, "[Compute Library] Failed to get the descriptor of the tensor"); return TensorDescriptor(desc); } }; /** Tensor pack class * * Pack is a utility construct that is used to create a collection of tensors that can then * be passed into operator as inputs. */ class TensorPack : public detail::ObjectBase { public: /** Pack pair construct */ struct PackPair { /** Constructor * * @param[in] tensor_ Tensor to pack * @param[in] slot_id_ Slot identification of the tensor in respect with the operator */ PackPair(Tensor *tensor_, int32_t slot_id_) : tensor(tensor_), slot_id(slot_id_) { } Tensor *tensor{nullptr}; /**< Tensor object */ int32_t slot_id{AclSlotUnknown}; /**< Slot id in respect with the operator */ }; public: /** Constructor * * @param[in] ctx Context from where the tensor pack will be created from * @param[out] status Status information if requested */ explicit TensorPack(Context &ctx, StatusCode *status = nullptr) { AclTensorPack pack; const auto st = detail::as_enum(AclCreateTensorPack(&pack, ctx.get())); reset(pack); report_status(st, "[Compute Library] Failure during tensor pack creation"); if (status) { *status = st; } } /** Add tensor to tensor pack * * @param[in] slot_id Slot id of the tensor in respect with the operator * @param[in] tensor Tensor to be added in the pack * * @return Status code */ StatusCode add(Tensor &tensor, int32_t slot_id) { return detail::as_enum(AclPackTensor(_object.get(), tensor.get(), slot_id)); } /** Add a list of tensors to a tensor pack * * @param[in] packed Pair packs to be added * * @return Status code */ StatusCode add(std::initializer_list packed) { const size_t size = packed.size(); std::vector slots(size); std::vector tensors(size); int i = 0; for (auto &p : packed) { slots[i] = p.slot_id; tensors[i] = AclTensor(p.tensor); ++i; } return detail::as_enum(AclPackTensors(_object.get(), tensors.data(), slots.data(), size)); } }; /** Operator class * * Operators are the basic algorithmic blocks responsible for performing distinct operations */ class Operator : public detail::ObjectBase { public: /** Run an operator on a given input list * * @param[in,out] queue Queue to scheduler the operator on * @param pack Tensor list to be used as input * * @return Status Code */ StatusCode run(Queue &queue, TensorPack &pack) { return detail::as_cenum(AclRunOperator(_object.get(), queue.get(), pack.get())); } protected: /** Constructor */ Operator() = default; }; /// Operators using ActivationDesc = AclActivationDescriptor; class Activation : public Operator { public: Activation(Context &ctx, const TensorDescriptor &src, const TensorDescriptor &dst, const ActivationDesc &desc, StatusCode *status = nullptr) { AclOperator op; const auto st = detail::as_enum(AclActivation(&op, ctx.get(), src.get(), dst.get(), desc)); reset(op); report_status(st, "[Compute Library] Failure during Activation operator creation"); if (status) { *status = st; } } }; } // namespace acl #undef ARM_COMPUTE_IGNORE_UNUSED #endif /* ARM_COMPUTE_ACL_HPP_ */