From c3c352e60050f3deacad767e429a88dc24b31af0 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 18 Mar 2021 10:59:40 +0000 Subject: Add Queue support Queues are responsible for scheduling operators and performing other runtime related activities like for example tuning. Signed-off-by: Georgios Pinitas Change-Id: I0366d9048470d277b8cbf59fa42f95c0ae57c5c9 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5487 Tested-by: Arm Jenkins Reviewed-by: Michele Di Giorgio Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins --- arm_compute/Acl.hpp | 104 +++++++++++++++++++++++++++++++++++++++---- arm_compute/AclEntrypoints.h | 43 ++++++++++++++++++ arm_compute/AclOpenClExt.h | 38 +++++++++++++++- arm_compute/AclTypes.h | 18 ++++++++ arm_compute/core/CL/OpenCL.h | 1 + 5 files changed, 194 insertions(+), 10 deletions(-) (limited to 'arm_compute') diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp index 01f7179c2f..93ac2d8ed9 100644 --- a/arm_compute/Acl.hpp +++ b/arm_compute/Acl.hpp @@ -42,6 +42,7 @@ namespace acl { // Forward declarations class Context; +class Queue; class Tensor; class TensorPack; @@ -83,6 +84,7 @@ struct ObjectDeleter }; OBJECT_DELETER(AclContext, AclDestroyContext) +OBJECT_DELETER(AclQueue, AclDestroyQueue) OBJECT_DELETER(AclTensor, AclDestroyTensor) OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack) @@ -384,7 +386,7 @@ public: AclContext ctx; const auto st = detail::as_enum(AclCreateContext(&ctx, detail::as_cenum(target), &options.copts)); reset(ctx); - report_status(st, "[Arm Compute Library] Failed to create context"); + report_status(st, "[Compute Library] Failed to create context"); if(status) { *status = st; @@ -392,6 +394,92 @@ public: } }; +/**< Available tuning modes */ +enum class TuningMode +{ + Rapid = AclRapid, + Normal = AclNormal, + Exhaustive = AclExhaustive +}; + +/** Queue class + * + * Queue is responsible for the execution related aspects, with main responsibilities those of + * scheduling and tuning operators. + * + * Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently. + * + * @note An operator might depend on the maximum possible compute units that are provided in the context, + * thus in cases where the number of the scheduling units of the queue are greater might lead to errors. + */ +class Queue : public detail::ObjectBase +{ +public: + /**< Queue options */ + struct Options + { + /** Default Constructor + * + * As default options, no tuning will be performed, and the number of scheduling units will + * depends on internal device discovery functionality + */ + Options() + : opts{ AclTuningModeNone, 0 } {}; + /** Constructor + * + * @param[in] mode Tuning mode to be used + * @param[in] compute_units Number of scheduling units to be used + */ + Options(TuningMode mode, int32_t compute_units) + : opts{ detail::as_cenum(mode), compute_units } + { + } + + AclQueueOptions opts; + }; + +public: + /** Constructor + * + * @note Serves as a simpler delegate constructor + * @note As queue options, default conservative options will be used + * + * @param[in] ctx Context to create queue for + * @param[out] status Status information if requested + */ + explicit Queue(Context &ctx, StatusCode *status = nullptr) + : Queue(ctx, Options(), status) + { + } + /** Constructor + * + * @note As queue options, default conservative options will be used + * + * @param[in] ctx Context from where the queue will be created from + * @param[in] options Queue options to be used + * @param[out] status Status information if requested + */ + explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr) + { + AclQueue queue; + const auto st = detail::as_enum(AclCreateQueue(&queue, ctx.get(), &options.opts)); + reset(queue); + report_status(st, "[Compute Library] Failed to create queue!"); + if(status) + { + *status = st; + } + } + /** Block until all the tasks of the queue have been marked as finished + * + * @return Status code + */ + StatusCode finish() + { + return detail::as_enum(AclQueueFinish(_object.get())); + } +}; + /**< Data type enumeration */ enum class DataType { @@ -519,7 +607,7 @@ public: AclTensor tensor; const auto st = detail::as_enum(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate)); reset(tensor); - report_status(st, "[Arm Compute Library] Failed to create tensor!"); + report_status(st, "[Compute Library] Failed to create tensor!"); if(status) { *status = st; @@ -533,7 +621,7 @@ public: { void *handle = nullptr; const auto st = detail::as_enum(AclMapTensor(_object.get(), &handle)); - report_status(st, "[Arm Compute Library] Failed to map the tensor and extract the tensor's backing memory!"); + report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!"); return handle; } /** Unmaps tensor's memory @@ -545,7 +633,7 @@ public: StatusCode unmap(void *handle) { const auto st = detail::as_enum(AclUnmapTensor(_object.get(), handle)); - report_status(st, "[Arm Compute Library] Failed to unmap the tensor!"); + report_status(st, "[Compute Library] Failed to unmap the tensor!"); return st; } /** Import external memory to a given tensor object @@ -558,7 +646,7 @@ public: StatusCode import(void *handle, ImportType type) { const auto st = detail::as_enum(AclTensorImport(_object.get(), handle, detail::as_cenum(type))); - report_status(st, "[Arm Compute Library] Failed to import external memory to tensor!"); + report_status(st, "[Compute Library] Failed to import external memory to tensor!"); return st; } /** Get the size of the tensor in byte @@ -571,7 +659,7 @@ public: { uint64_t size{ 0 }; const auto st = detail::as_enum(AclGetTensorSize(_object.get(), &size)); - report_status(st, "[Arm Compute Library] Failed to get the size of the tensor"); + report_status(st, "[Compute Library] Failed to get the size of the tensor"); return size; } /** Get the descriptor of this tensor @@ -582,7 +670,7 @@ public: { AclTensorDescriptor desc; const auto st = detail::as_enum(AclGetTensorDescriptor(_object.get(), &desc)); - report_status(st, "[Arm Compute Library] Failed to get the descriptor of the tensor"); + report_status(st, "[Compute Library] Failed to get the descriptor of the tensor"); return TensorDescriptor(desc); } }; @@ -623,7 +711,7 @@ public: AclTensorPack pack; const auto st = detail::as_enum(AclCreateTensorPack(&pack, ctx.get())); reset(pack); - report_status(st, "[Arm Compute Library] Failure during tensor pack creation"); + report_status(st, "[Compute Library] Failure during tensor pack creation"); if(status) { *status = st; diff --git a/arm_compute/AclEntrypoints.h b/arm_compute/AclEntrypoints.h index cd974341c2..cf4a237a44 100644 --- a/arm_compute/AclEntrypoints.h +++ b/arm_compute/AclEntrypoints.h @@ -62,6 +62,49 @@ AclStatus AclCreateContext(AclContext *ctx, */ AclStatus AclDestroyContext(AclContext ctx); +/** Create an operator queue + * + * Queue is responsible for any scheduling related activities + * + * @param[in, out] queue A valid non-zero queue object is not failures occur + * @param[in] ctx Context to be used + * @param[in] options Queue options to be used for the operators using the queue + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if function was completed successfully + * - @ref AclOutOfMemory if there was a failure allocating memory resources + * - @ref AclUnsupportedTarget if the requested target is unsupported + * - @ref AclInvalidArgument if a given argument is invalid + */ +AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions *options); + +/** Wait until all elements on the queue have been completed + * + * @param[in] queue Queue to wait on completion + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if functions was completed successfully + * - @ref AclInvalidArgument if the provided queue is invalid + * - @ref AclRuntimeError on any other runtime related error + */ +AclStatus AclQueueFinish(AclQueue queue); + +/** Destroy a given queue object + * + * @param[in] queue A valid context object to destroy + * + * @return Status code + * + * Returns: + * - @ref AclSuccess if functions was completed successfully + * - @ref AclInvalidArgument if the provided context is invalid + */ +AclStatus AclDestroyQueue(AclQueue queue); + /** Create a Tensor object * * Tensor is a generalized matrix construct that can represent up to ND dimensionality (where N = 6 for Compute Library) diff --git a/arm_compute/AclOpenClExt.h b/arm_compute/AclOpenClExt.h index 15b233ca12..b9080dabf2 100644 --- a/arm_compute/AclOpenClExt.h +++ b/arm_compute/AclOpenClExt.h @@ -43,7 +43,6 @@ extern "C" { /** Extract the underlying OpenCL context used by a given Compute Library context object * * @note @ref AclContext should be of an OpenCL backend target - * @note @ref AclContext refcount should be 0, meaning not used by other objects * * @param[in] ctx A valid non-zero context * @param[out] opencl_context Underlying OpenCL context used @@ -52,7 +51,18 @@ extern "C" { */ AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context); -/** Set the underlying OpenCL context used by a given Compute Library context object +/** Extract the underlying OpenCL device id used by a given Compute Library context object + * + * @note @ref AclContext should be of an OpenCL backend target + * + * @param[in] ctx A valid non-zero context + * @param[out] opencl_device Underlying OpenCL device used + * + * @return Status code + */ +AclStatus AclGetClDevice(AclContext ctx, cl_device_id *opencl_device); + +/** Set the underlying OpenCL context to be used by a given Compute Library context object * * @note @ref AclContext should be of an OpenCL backend target * @@ -63,6 +73,30 @@ AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context); */ AclStatus AclSetClContext(AclContext ctx, cl_context opencl_context); +/** Extract the underlying OpenCL queue used by a given Compute Library queue object + * + * @note @ref AclQueue should be of an OpenCL backend target + * @note @ref AclQueue refcount should be 0, meaning not used by other objects + * + * @param[in] queue A valid non-zero queue + * @param[out] opencl_queue Underlying OpenCL queue used + * + * @return Status code + */ +AclStatus AclGetClQueue(AclQueue queue, cl_command_queue *opencl_queue); + +/** Set the underlying OpenCL queue to be used by a given Compute Library queue object + * + * @note @ref AclQueue should be of an OpenCL backend target + * @note opecl_queue needs to be created from the same context that the AclContext that the queue will use + * + * @param[in] queue A valid non-zero queue object + * @param[out] opencl_queue Underlying OpenCL queue to be used + * + * @return Status code + */ +AclStatus AclSetClQueue(AclQueue queue, cl_command_queue opencl_queue); + /** Extract the underlying OpenCL memory object by a given Compute Library tensor object * * @param[in] tensor A valid non-zero tensor diff --git a/arm_compute/AclTypes.h b/arm_compute/AclTypes.h index 69717ec8a8..902a508b91 100644 --- a/arm_compute/AclTypes.h +++ b/arm_compute/AclTypes.h @@ -33,6 +33,8 @@ extern "C" { /**< Opaque Context object */ typedef struct AclContext_ *AclContext; +/**< Opaque Queue object */ +typedef struct AclQueue_ *AclQueue; /**< Opaque Tensor object */ typedef struct AclTensor_ *AclTensor; /**< Opaque Tensor pack object */ @@ -138,6 +140,22 @@ typedef struct AclContextOptions AclAllocator *allocator; /**< Allocator to be used by all the memory internally */ } AclContextOptions; +/**< Supported tuning modes */ +typedef enum +{ + AclTuningModeNone = 0, /**< No tuning */ + AclRapid = 1, /**< Fast tuning mode, testing a small portion of the tuning space */ + AclNormal = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */ + AclExhaustive = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */ +} AclTuningMode; + +/**< Queue options */ +typedef struct +{ + AclTuningMode mode; /**< Tuning mode */ + int32_t compute_units; /**< Compute Units that the queue will deploy */ +} AclQueueOptions; + /**< Supported data types */ typedef enum AclDataType { diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h index 1e6b04c042..bbe469f1a8 100644 --- a/arm_compute/core/CL/OpenCL.h +++ b/arm_compute/core/CL/OpenCL.h @@ -92,6 +92,7 @@ public: DECLARE_FUNCTION_PTR(clCreateContext); DECLARE_FUNCTION_PTR(clCreateContextFromType); DECLARE_FUNCTION_PTR(clCreateCommandQueue); + DECLARE_FUNCTION_PTR(clCreateCommandQueueWithProperties); DECLARE_FUNCTION_PTR(clGetContextInfo); DECLARE_FUNCTION_PTR(clBuildProgram); DECLARE_FUNCTION_PTR(clEnqueueNDRangeKernel); -- cgit v1.2.1