From 3f26ef4f9a2d447adb324dd69aec7c49cf7905fc Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Tue, 23 Feb 2021 10:01:33 +0000 Subject: Add tensor related data structures for the new API Adds the following: - TensorDescriptor: which is responsible for holding the information needed to represent a tensor (e.g. shape, dimensions, etc) - Tensor: an aggreate object of a descriptor and a backing memory - TensorPack: A map of tensor that can be passed to operators as inputs/outputs Signed-off-by: Georgios Pinitas Change-Id: I02734ac6ad85700d91d6e73217b4637adbf5d177 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5260 Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- arm_compute/Acl.hpp | 252 ++++++++++++++++++++++++++++++-- arm_compute/AclEntrypoints.h | 127 ++++++++++++++++ arm_compute/AclOpenClExt.h | 9 ++ arm_compute/AclTypes.h | 61 ++++++-- arm_compute/core/utils/logging/Macros.h | 15 +- 5 files changed, 440 insertions(+), 24 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp index b74e65430c..a009894438 100644 --- a/arm_compute/Acl.hpp +++ b/arm_compute/Acl.hpp @@ -29,6 +29,7 @@ #include #include #include +#include #if defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) #include @@ -41,6 +42,8 @@ namespace acl { // Forward declarations class Context; +class Tensor; +class TensorPack; /**< Status code enum */ enum class StatusCode @@ -80,6 +83,8 @@ struct ObjectDeleter }; OBJECT_DELETER(AclContext, AclDestroyContext) +OBJECT_DELETER(AclTensor, AclDestroyTensor) +OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack) #undef OBJECT_DELETER @@ -256,13 +261,12 @@ private: * * @return Status code */ -static inline StatusCode report_status(StatusCode status, const std::string &msg) +static inline void report_status(StatusCode status, const std::string &msg) { if(status != StatusCode::Success) { throw Status(status, msg); } - return status; } #else /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ /** Reports a status code @@ -275,10 +279,10 @@ static inline StatusCode report_status(StatusCode status, const std::string &msg * * @return Status code */ -static inline StatusCode report_status(StatusCode status, const std::string &msg) +static inline void report_status(StatusCode status, const std::string &msg) { + ARM_COMPUTE_IGNORE_UNUSED(status); ARM_COMPUTE_IGNORE_UNUSED(msg); - return status; } #endif /* defined(ARM_COMPUTE_EXCEPTIONS_ENABLED) */ @@ -313,12 +317,22 @@ public: /**< Context options */ struct Options { + static constexpr int32_t num_threads_auto = -1; /**< Allow runtime to specify number of threads */ + /** Default Constructor * * @note By default no precision loss is enabled for operators * @note By default the preferred execution mode is to favor multiple consecutive reruns of an operator */ - Options() = default; + Options() + : Options(ExecutionMode::FastRerun /* mode */, + AclCpuCapabilitiesAuto /* caps */, + false /* enable_fast_math */, + nullptr /* kernel_config */, + num_threads_auto /* max_compute_units */, + nullptr /* allocator */) + { + } /** Constructor * * @param[in] mode Execution mode to be used @@ -335,14 +349,15 @@ public: int32_t max_compute_units, AclAllocator *allocator) { - opts.mode = detail::as_cenum(mode); - opts.capabilities = caps; - opts.enable_fast_math = enable_fast_math; - opts.kernel_config_file = kernel_config; - opts.max_compute_units = max_compute_units; - opts.allocator = allocator; + copts.mode = detail::as_cenum(mode); + copts.capabilities = caps; + copts.enable_fast_math = enable_fast_math; + copts.kernel_config_file = kernel_config; + copts.max_compute_units = max_compute_units; + copts.allocator = allocator; } - AclContextOptions opts{ acl_default_ctx_options }; + + AclContextOptions copts{}; }; public: @@ -367,14 +382,223 @@ public: Context(Target target, const Options &options, StatusCode *status = nullptr) { AclContext ctx; - const auto st = detail::as_enum(AclCreateContext(&ctx, detail::as_cenum(target), &options.opts)); + const auto st = detail::as_enum(AclCreateContext(&ctx, detail::as_cenum(target), &options.copts)); reset(ctx); - report_status(st, "Failure during context creation"); + report_status(st, "[Arm Compute Library] Failed to create context"); + if(status) + { + *status = st; + } + } +}; + +/**< Data type enumeration */ +enum class DataType +{ + Unknown = AclDataTypeUnknown, + UInt8 = AclUInt8, + Int8 = AclInt8, + UInt16 = AclUInt16, + Int16 = AclInt16, + UInt32 = AclUint32, + Int32 = AclInt32, + Float16 = AclFloat16, + BFloat16 = AclBFloat16, + Float32 = AclFloat32, +}; + +/** Tensor Descriptor class + * + * Structure that contains all the required meta-data to represent a tensor + */ +class TensorDescriptor +{ +public: + /** Constructor + * + * @param[in] shape Shape of the tensor + * @param[in] data_type Data type of the tensor + */ + TensorDescriptor(const std::vector &shape, DataType data_type) + : _shape(shape), _data_type(data_type) + { + _cdesc.ndims = _shape.size(); + _cdesc.shape = _shape.data(); + _cdesc.data_type = detail::as_cenum(_data_type); + _cdesc.strides = nullptr; + _cdesc.boffset = 0; + } + /** Get underlying C tensor descriptor + * + * @return Underlying structure + */ + const AclTensorDescriptor *get() const + { + return &_cdesc; + } + +private: + std::vector _shape{}; + DataType _data_type{}; + AclTensorDescriptor _cdesc{}; +}; + +/** Import memory types */ +enum class ImportType +{ + Host = AclImportMemoryType::AclHostPtr +}; + +/** Tensor class + * + * Tensor is an mathematical construct that can represent an N-Dimensional space. + * + * @note Maximum dimensionality support is 6 internally at the moment + */ +class Tensor : public detail::ObjectBase +{ +public: + /** Constructor + * + * @note Tensor memory is allocated + * + * @param[in] ctx Context from where the tensor will be created from + * @param[in] desc Tensor descriptor to be used + * @param[out] status Status information if requested + */ + Tensor(Context &ctx, const TensorDescriptor &desc, StatusCode *status = nullptr) + : Tensor(ctx, desc, true, status) + { + } + /** Constructor + * + * @param[in] ctx Context from where the tensor will be created from + * @param[in] desc Tensor descriptor to be used + * @param[in] allocate Flag to indicate if the tensor needs to be allocated + * @param[out] status Status information if requested + */ + Tensor(Context &ctx, const TensorDescriptor &desc, bool allocate, StatusCode *status) + { + AclTensor tensor; + const auto st = detail::as_enum(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate)); + reset(tensor); + report_status(st, "[Arm Compute Library] Failed to create tensor!"); if(status) { *status = st; } } + /** Maps the backing memory of a given tensor that can be used by the host to access any contents + * + * @return A valid non-zero pointer in case of success else nullptr + */ + void *map() + { + void *handle = nullptr; + const auto st = detail::as_enum(AclMapTensor(_object.get(), &handle)); + report_status(st, "[Arm Compute Library] Failed to map the tensor and extract the tensor's backing memory!"); + return handle; + } + /** Unmaps tensor's memory + * + * @param[in] handle Handle to unmap + * + * @return Status code + */ + StatusCode unmap(void *handle) + { + const auto st = detail::as_enum(AclUnmapTensor(_object.get(), handle)); + report_status(st, "[Arm Compute Library] Failed to unmap the tensor!"); + return st; + } + /** Import external memory to a given tensor object + * + * @param[in] handle External memory handle + * @param[in] type Type of memory to be imported + * + * @return Status code + */ + StatusCode import(void *handle, ImportType type) + { + const auto st = detail::as_enum(AclTensorImport(_object.get(), handle, detail::as_cenum(type))); + report_status(st, "[Arm Compute Library] Failed to import external memory to tensor!"); + return st; + } +}; + +/** Tensor pack class + * + * Pack is a utility construct that is used to create a collection of tensors that can then + * be passed into operator as inputs. + */ +class TensorPack : public detail::ObjectBase +{ +public: + /** Pack pair construct */ + struct PackPair + { + /** Constructor + * + * @param[in] tensor_ Tensor to pack + * @param[in] slot_id_ Slot identification of the tensor in respect with the operator + */ + PackPair(Tensor *tensor_, int32_t slot_id_) + : tensor(tensor_), slot_id(slot_id_) + { + } + + Tensor *tensor{ nullptr }; /**< Tensor object */ + int32_t slot_id{ AclSlotUnknown }; /**< Slot id in respect with the operator */ + }; + +public: + /** Constructor + * + * @param[in] ctx Context from where the tensor pack will be created from + * @param[out] status Status information if requested + */ + explicit TensorPack(Context &ctx, StatusCode *status = nullptr) + { + AclTensorPack pack; + const auto st = detail::as_enum(AclCreateTensorPack(&pack, ctx.get())); + reset(pack); + report_status(st, "[Arm Compute Library] Failure during tensor pack creation"); + if(status) + { + *status = st; + } + } + /** Add tensor to tensor pack + * + * @param[in] slot_id Slot id of the tensor in respect with the operator + * @param[in] tensor Tensor to be added in the pack + * + * @return Status code + */ + StatusCode add(Tensor &tensor, int32_t slot_id) + { + return detail::as_enum(AclPackTensor(_object.get(), tensor.get(), slot_id)); + } + /** Add a list of tensors to a tensor pack + * + * @param[in] packed Pair packs to be added + * + * @return Status code + */ + StatusCode add(std::initializer_list packed) + { + const size_t size = packed.size(); + std::vector slots(size); + std::vector tensors(size); + int i = 0; + for(auto &p : packed) + { + slots[i] = p.slot_id; + tensors[i] = AclTensor(p.tensor); + ++i; + } + return detail::as_enum(AclPackTensors(_object.get(), tensors.data(), slots.data(), size)); + } }; } // namespace acl #undef ARM_COMPUTE_IGNORE_UNUSED diff --git a/arm_compute/AclEntrypoints.h b/arm_compute/AclEntrypoints.h index 02e072f826..cd974341c2 100644 --- a/arm_compute/AclEntrypoints.h +++ b/arm_compute/AclEntrypoints.h @@ -62,6 +62,133 @@ AclStatus AclCreateContext(AclContext *ctx, */ AclStatus AclDestroyContext(AclContext ctx); +/** Create a Tensor object + * + * Tensor is a generalized matrix construct that can represent up to ND dimensionality (where N = 6 for Compute Library) + * The object holds a backing memory along-side to operate on + * + * @param[in, out] tensor A valid non-zero tensor object if no failures occur + * @param[in] ctx Context to be used + * @param[in] desc Tensor representation meta-data + * @param[in] allocate Instructs allocation of the tensor objects + * + * Returns: + * - @ref AclSuccess if function was completed successfully + * - @ref AclOutOfMemory if there was a failure allocating memory resources + * - @ref AclUnsupportedTarget if the requested target is unsupported + * - @ref AclInvalidArgument if a given argument is invalid + */ +AclStatus AclCreateTensor(AclTensor *tensor, AclContext ctx, const AclTensorDescriptor *desc, bool allocate); + +/** Map a tensor's backing memory to the host + * + * @param[in] tensor Tensor to be mapped + * @param[in, out] handle A handle to the underlying backing memory + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if function was completed successfully + * - @ref AclInvalidArgument if a given argument is invalid + */ +AclStatus AclMapTensor(AclTensor tensor, void **handle); + +/** Unmap the tensor's backing memory + * + * @param[in] tensor tensor to unmap memory from + * @param[in] handle Backing memory to be unmapped + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if function was completed successfully + * - @ref AclInvalidArgument if a given argument is invalid + */ +AclStatus AclUnmapTensor(AclTensor tensor, void *handle); + +/** Import external memory to a given tensor object + * + * @param[in, out] tensor Tensor to import memory to + * @param[in] handle Backing memory to be imported + * @param[in] type Type of the imported memory + * + * Returns: + * - @ref AclSuccess if function was completed successfully + * - @ref AclInvalidArgument if a given argument is invalid + */ +AclStatus AclTensorImport(AclTensor tensor, void *handle, AclImportMemoryType type); + +/** Destroy a given tensor object + * + * @param[in,out] tensor A valid tensor object to be destroyed + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if function was completed successfully + * - @ref AclInvalidArgument if the provided tensor is invalid + */ +AclStatus AclDestroyTensor(AclTensor tensor); + +/** Creates a tensor pack + * + * Tensor packs are used to create a collection of tensors that can be passed around for operator execution + * + * @param[in,out] pack A valid non-zero tensor pack object if no failures occur + * @param[in] ctx Context to be used + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if function was completed successfully + * - @ref AclOutOfMemory if there was a failure allocating memory resources + * - @ref AclInvalidArgument if a given argument is invalid + */ +AclStatus AclCreateTensorPack(AclTensorPack *pack, AclContext ctx); + +/** Add a tensor to a tensor pack + * + * @param[in,out] pack Pack to append a tensor to + * @param[in] tensor Tensor to pack + * @param[in] slot_id Slot of the operator that the tensors corresponds to + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if function was completed successfully + * - @ref AclOutOfMemory if there was a failure allocating memory resources + * - @ref AclInvalidArgument if a given argument is invalid + */ +AclStatus AclPackTensor(AclTensorPack pack, AclTensor tensor, int32_t slot_id); + +/** A list of tensors to a tensor pack + * + * @param[in,out] pack Pack to append the tensors to + * @param[in] tensors Tensors to append to the pack + * @param[in] slot_ids Slot IDs of each tensors to the operators + * @param[in] num_tensors Number of tensors that are passed + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if function was completed successfully + * - @ref AclOutOfMemory if there was a failure allocating memory resources + * - @ref AclInvalidArgument if a given argument is invalid + */ +AclStatus AclPackTensors(AclTensorPack pack, AclTensor *tensors, int32_t *slot_ids, size_t num_tensors); + +/** Destroy a given tensor pack object + * + * @param[in,out] pack A valid tensor pack object to destroy + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if functions was completed successfully + * - @ref AclInvalidArgument if the provided context is invalid + */ +AclStatus AclDestroyTensorPack(AclTensorPack pack); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/arm_compute/AclOpenClExt.h b/arm_compute/AclOpenClExt.h index f71cd37299..15b233ca12 100644 --- a/arm_compute/AclOpenClExt.h +++ b/arm_compute/AclOpenClExt.h @@ -63,6 +63,15 @@ AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context); */ AclStatus AclSetClContext(AclContext ctx, cl_context opencl_context); +/** Extract the underlying OpenCL memory object by a given Compute Library tensor object + * + * @param[in] tensor A valid non-zero tensor + * @param[out] opencl_mem Underlyig OpenCL memory object + * + * @return Status code + */ +AclStatus AclGetClMem(AclTensor tensor, cl_mem *opencl_mem); + #ifdef __cplusplus } #endif /* __cplusplus */ diff --git a/arm_compute/AclTypes.h b/arm_compute/AclTypes.h index bee6d1a8d7..69717ec8a8 100644 --- a/arm_compute/AclTypes.h +++ b/arm_compute/AclTypes.h @@ -33,6 +33,10 @@ extern "C" { /**< Opaque Context object */ typedef struct AclContext_ *AclContext; +/**< Opaque Tensor object */ +typedef struct AclTensor_ *AclTensor; +/**< Opaque Tensor pack object */ +typedef struct AclTensorPack_ *AclTensorPack; // Capabilities bitfield (Note: if multiple are enabled ComputeLibrary will pick the best possible) typedef uint64_t AclTargetCapabilities; @@ -134,16 +138,55 @@ typedef struct AclContextOptions AclAllocator *allocator; /**< Allocator to be used by all the memory internally */ } AclContextOptions; -/** Default context */ -const AclContextOptions acl_default_ctx_options = +/**< Supported data types */ +typedef enum AclDataType { - AclPreferFastRerun, /* mode */ - AclCpuCapabilitiesAuto, /* capabilities */ - false, /* enable_fast_math */ - "default.mlgo", /* kernel_config_file */ - -1, /* max_compute_units */ - nullptr /* allocator */ -}; + AclDataTypeUnknown = 0, /**< Unknown data type */ + AclUInt8 = 1, /**< 8-bit unsigned integer */ + AclInt8 = 2, /**< 8-bit signed integer */ + AclUInt16 = 3, /**< 16-bit unsigned integer */ + AclInt16 = 4, /**< 16-bit signed integer */ + AclUint32 = 5, /**< 32-bit unsigned integer */ + AclInt32 = 6, /**< 32-bit signed integer */ + AclFloat16 = 7, /**< 16-bit floating point */ + AclBFloat16 = 8, /**< 16-bit brain floating point */ + AclFloat32 = 9, /**< 32-bit floating point */ +} AclDataType; + +/**< Supported data layouts for operations */ +typedef enum AclDataLayout +{ + AclDataLayoutUnknown = 0, /**< Unknown data layout */ + AclNhwc = 1, /**< Native, performant, Compute Library data layout */ + AclNchw = 2, /**< Data layout where width is the fastest changing dimension */ +} AclDataLayout; + +/** Type of memory to be imported */ +typedef enum AclImportMemoryType +{ + AclHostPtr = 0 /**< Host allocated memory */ +} AclImportMemoryType; + +/**< Tensor Descriptor */ +typedef struct AclTensorDescriptor +{ + int32_t ndims; /**< Number or dimensions */ + int32_t *shape; /**< Tensor Shape */ + AclDataType data_type; /**< Tensor Data type */ + int64_t *strides; /**< Strides on each dimension. Linear memory is assumed if nullptr */ + int64_t boffset; /**< Offset in terms of bytes for the first element */ +} AclTensorDescriptor; + +/**< Slot type of a tensor */ +typedef enum +{ + AclSlotUnknown = -1, + AclSrc = 0, + AclSrc0 = 0, + AclSrc1 = 1, + AclDst = 30, + AclSrcVec = 256, +} AclTensorSlot; #ifdef __cplusplus } diff --git a/arm_compute/core/utils/logging/Macros.h b/arm_compute/core/utils/logging/Macros.h index 21ed721eb1..1108dd3800 100644 --- a/arm_compute/core/utils/logging/Macros.h +++ b/arm_compute/core/utils/logging/Macros.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2021 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -40,6 +40,18 @@ } \ } while(false) +#define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg) \ + do \ + { \ + auto __logger = arm_compute::logging::LoggerRegistry::get().logger(logger_name); \ + if(__logger != nullptr) \ + { \ + std::ostringstream s; \ + s << __func__ << ":" << msg; \ + __logger->log(log_level, s.str()); \ + } \ + } while(false) + #define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...) \ do \ { \ @@ -68,6 +80,7 @@ #else /* ARM_COMPUTE_LOGGING_ENABLED */ #define ARM_COMPUTE_LOG_MSG(logger_name, log_level, msg) +#define ARM_COMPUTE_LOG_MSG_WITH_FUNCNAME(logger_name, log_level, msg) #define ARM_COMPUTE_LOG_MSG_WITH_FORMAT(logger_name, log_level, fmt, ...) #define ARM_COMPUTE_LOG_STREAM(logger_name, log_level, stream) -- cgit v1.2.1