aboutsummaryrefslogtreecommitdiff
path: root/arm_compute
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-03-18 10:59:40 +0000
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-28 17:05:40 +0000
commitc3c352e60050f3deacad767e429a88dc24b31af0 (patch)
treead30a0ba717a742caf5e4dcb9d89389cfdc134b0 /arm_compute
parente2535154fa34ac0290ec3daaa44545be0b2b4606 (diff)
downloadComputeLibrary-c3c352e60050f3deacad767e429a88dc24b31af0.tar.gz
Add Queue support
Queues are responsible for scheduling operators and performing other runtime related activities like for example tuning. Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I0366d9048470d277b8cbf59fa42f95c0ae57c5c9 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5487 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'arm_compute')
-rw-r--r--arm_compute/Acl.hpp104
-rw-r--r--arm_compute/AclEntrypoints.h43
-rw-r--r--arm_compute/AclOpenClExt.h38
-rw-r--r--arm_compute/AclTypes.h18
-rw-r--r--arm_compute/core/CL/OpenCL.h1
5 files changed, 194 insertions, 10 deletions
diff --git a/arm_compute/Acl.hpp b/arm_compute/Acl.hpp
index 01f7179c2f..93ac2d8ed9 100644
--- a/arm_compute/Acl.hpp
+++ b/arm_compute/Acl.hpp
@@ -42,6 +42,7 @@ namespace acl
{
// Forward declarations
class Context;
+class Queue;
class Tensor;
class TensorPack;
@@ -83,6 +84,7 @@ struct ObjectDeleter
};
OBJECT_DELETER(AclContext, AclDestroyContext)
+OBJECT_DELETER(AclQueue, AclDestroyQueue)
OBJECT_DELETER(AclTensor, AclDestroyTensor)
OBJECT_DELETER(AclTensorPack, AclDestroyTensorPack)
@@ -384,7 +386,7 @@ public:
AclContext ctx;
const auto st = detail::as_enum<StatusCode>(AclCreateContext(&ctx, detail::as_cenum<AclTarget>(target), &options.copts));
reset(ctx);
- report_status(st, "[Arm Compute Library] Failed to create context");
+ report_status(st, "[Compute Library] Failed to create context");
if(status)
{
*status = st;
@@ -392,6 +394,92 @@ public:
}
};
+/**< Available tuning modes */
+enum class TuningMode
+{
+ Rapid = AclRapid,
+ Normal = AclNormal,
+ Exhaustive = AclExhaustive
+};
+
+/** Queue class
+ *
+ * Queue is responsible for the execution related aspects, with main responsibilities those of
+ * scheduling and tuning operators.
+ *
+ * Multiple queues can be created from the same context, and the same operator can be scheduled on each concurrently.
+ *
+ * @note An operator might depend on the maximum possible compute units that are provided in the context,
+ * thus in cases where the number of the scheduling units of the queue are greater might lead to errors.
+ */
+class Queue : public detail::ObjectBase<AclQueue_>
+{
+public:
+ /**< Queue options */
+ struct Options
+ {
+ /** Default Constructor
+ *
+ * As default options, no tuning will be performed, and the number of scheduling units will
+ * depends on internal device discovery functionality
+ */
+ Options()
+ : opts{ AclTuningModeNone, 0 } {};
+ /** Constructor
+ *
+ * @param[in] mode Tuning mode to be used
+ * @param[in] compute_units Number of scheduling units to be used
+ */
+ Options(TuningMode mode, int32_t compute_units)
+ : opts{ detail::as_cenum<AclTuningMode>(mode), compute_units }
+ {
+ }
+
+ AclQueueOptions opts;
+ };
+
+public:
+ /** Constructor
+ *
+ * @note Serves as a simpler delegate constructor
+ * @note As queue options, default conservative options will be used
+ *
+ * @param[in] ctx Context to create queue for
+ * @param[out] status Status information if requested
+ */
+ explicit Queue(Context &ctx, StatusCode *status = nullptr)
+ : Queue(ctx, Options(), status)
+ {
+ }
+ /** Constructor
+ *
+ * @note As queue options, default conservative options will be used
+ *
+ * @param[in] ctx Context from where the queue will be created from
+ * @param[in] options Queue options to be used
+ * @param[out] status Status information if requested
+ */
+ explicit Queue(Context &ctx, const Options &options = Options(), StatusCode *status = nullptr)
+ {
+ AclQueue queue;
+ const auto st = detail::as_enum<StatusCode>(AclCreateQueue(&queue, ctx.get(), &options.opts));
+ reset(queue);
+ report_status(st, "[Compute Library] Failed to create queue!");
+ if(status)
+ {
+ *status = st;
+ }
+ }
+ /** Block until all the tasks of the queue have been marked as finished
+ *
+ * @return Status code
+ */
+ StatusCode finish()
+ {
+ return detail::as_enum<StatusCode>(AclQueueFinish(_object.get()));
+ }
+};
+
/**< Data type enumeration */
enum class DataType
{
@@ -519,7 +607,7 @@ public:
AclTensor tensor;
const auto st = detail::as_enum<StatusCode>(AclCreateTensor(&tensor, ctx.get(), desc.get(), allocate));
reset(tensor);
- report_status(st, "[Arm Compute Library] Failed to create tensor!");
+ report_status(st, "[Compute Library] Failed to create tensor!");
if(status)
{
*status = st;
@@ -533,7 +621,7 @@ public:
{
void *handle = nullptr;
const auto st = detail::as_enum<StatusCode>(AclMapTensor(_object.get(), &handle));
- report_status(st, "[Arm Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
+ report_status(st, "[Compute Library] Failed to map the tensor and extract the tensor's backing memory!");
return handle;
}
/** Unmaps tensor's memory
@@ -545,7 +633,7 @@ public:
StatusCode unmap(void *handle)
{
const auto st = detail::as_enum<StatusCode>(AclUnmapTensor(_object.get(), handle));
- report_status(st, "[Arm Compute Library] Failed to unmap the tensor!");
+ report_status(st, "[Compute Library] Failed to unmap the tensor!");
return st;
}
/** Import external memory to a given tensor object
@@ -558,7 +646,7 @@ public:
StatusCode import(void *handle, ImportType type)
{
const auto st = detail::as_enum<StatusCode>(AclTensorImport(_object.get(), handle, detail::as_cenum<AclImportMemoryType>(type)));
- report_status(st, "[Arm Compute Library] Failed to import external memory to tensor!");
+ report_status(st, "[Compute Library] Failed to import external memory to tensor!");
return st;
}
/** Get the size of the tensor in byte
@@ -571,7 +659,7 @@ public:
{
uint64_t size{ 0 };
const auto st = detail::as_enum<StatusCode>(AclGetTensorSize(_object.get(), &size));
- report_status(st, "[Arm Compute Library] Failed to get the size of the tensor");
+ report_status(st, "[Compute Library] Failed to get the size of the tensor");
return size;
}
/** Get the descriptor of this tensor
@@ -582,7 +670,7 @@ public:
{
AclTensorDescriptor desc;
const auto st = detail::as_enum<StatusCode>(AclGetTensorDescriptor(_object.get(), &desc));
- report_status(st, "[Arm Compute Library] Failed to get the descriptor of the tensor");
+ report_status(st, "[Compute Library] Failed to get the descriptor of the tensor");
return TensorDescriptor(desc);
}
};
@@ -623,7 +711,7 @@ public:
AclTensorPack pack;
const auto st = detail::as_enum<StatusCode>(AclCreateTensorPack(&pack, ctx.get()));
reset(pack);
- report_status(st, "[Arm Compute Library] Failure during tensor pack creation");
+ report_status(st, "[Compute Library] Failure during tensor pack creation");
if(status)
{
*status = st;
diff --git a/arm_compute/AclEntrypoints.h b/arm_compute/AclEntrypoints.h
index cd974341c2..cf4a237a44 100644
--- a/arm_compute/AclEntrypoints.h
+++ b/arm_compute/AclEntrypoints.h
@@ -62,6 +62,49 @@ AclStatus AclCreateContext(AclContext *ctx,
*/
AclStatus AclDestroyContext(AclContext ctx);
+/** Create an operator queue
+ *
+ * Queue is responsible for any scheduling related activities
+ *
+ * @param[in, out] queue A valid non-zero queue object is not failures occur
+ * @param[in] ctx Context to be used
+ * @param[in] options Queue options to be used for the operators using the queue
+ *
+ * @return Status code
+ *
+ * Returns:
+ * - @ref AclSuccess if function was completed successfully
+ * - @ref AclOutOfMemory if there was a failure allocating memory resources
+ * - @ref AclUnsupportedTarget if the requested target is unsupported
+ * - @ref AclInvalidArgument if a given argument is invalid
+ */
+AclStatus AclCreateQueue(AclQueue *queue, AclContext ctx, const AclQueueOptions *options);
+
+/** Wait until all elements on the queue have been completed
+ *
+ * @param[in] queue Queue to wait on completion
+ *
+ * @return Status code
+ *
+ * Returns:
+ * - @ref AclSuccess if functions was completed successfully
+ * - @ref AclInvalidArgument if the provided queue is invalid
+ * - @ref AclRuntimeError on any other runtime related error
+ */
+AclStatus AclQueueFinish(AclQueue queue);
+
+/** Destroy a given queue object
+ *
+ * @param[in] queue A valid context object to destroy
+ *
+ * @return Status code
+ *
+ * Returns:
+ * - @ref AclSuccess if functions was completed successfully
+ * - @ref AclInvalidArgument if the provided context is invalid
+ */
+AclStatus AclDestroyQueue(AclQueue queue);
+
/** Create a Tensor object
*
* Tensor is a generalized matrix construct that can represent up to ND dimensionality (where N = 6 for Compute Library)
diff --git a/arm_compute/AclOpenClExt.h b/arm_compute/AclOpenClExt.h
index 15b233ca12..b9080dabf2 100644
--- a/arm_compute/AclOpenClExt.h
+++ b/arm_compute/AclOpenClExt.h
@@ -43,7 +43,6 @@ extern "C" {
/** Extract the underlying OpenCL context used by a given Compute Library context object
*
* @note @ref AclContext should be of an OpenCL backend target
- * @note @ref AclContext refcount should be 0, meaning not used by other objects
*
* @param[in] ctx A valid non-zero context
* @param[out] opencl_context Underlying OpenCL context used
@@ -52,7 +51,18 @@ extern "C" {
*/
AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context);
-/** Set the underlying OpenCL context used by a given Compute Library context object
+/** Extract the underlying OpenCL device id used by a given Compute Library context object
+ *
+ * @note @ref AclContext should be of an OpenCL backend target
+ *
+ * @param[in] ctx A valid non-zero context
+ * @param[out] opencl_device Underlying OpenCL device used
+ *
+ * @return Status code
+ */
+AclStatus AclGetClDevice(AclContext ctx, cl_device_id *opencl_device);
+
+/** Set the underlying OpenCL context to be used by a given Compute Library context object
*
* @note @ref AclContext should be of an OpenCL backend target
*
@@ -63,6 +73,30 @@ AclStatus AclGetClContext(AclContext ctx, cl_context *opencl_context);
*/
AclStatus AclSetClContext(AclContext ctx, cl_context opencl_context);
+/** Extract the underlying OpenCL queue used by a given Compute Library queue object
+ *
+ * @note @ref AclQueue should be of an OpenCL backend target
+ * @note @ref AclQueue refcount should be 0, meaning not used by other objects
+ *
+ * @param[in] queue A valid non-zero queue
+ * @param[out] opencl_queue Underlying OpenCL queue used
+ *
+ * @return Status code
+ */
+AclStatus AclGetClQueue(AclQueue queue, cl_command_queue *opencl_queue);
+
+/** Set the underlying OpenCL queue to be used by a given Compute Library queue object
+ *
+ * @note @ref AclQueue should be of an OpenCL backend target
+ * @note opecl_queue needs to be created from the same context that the AclContext that the queue will use
+ *
+ * @param[in] queue A valid non-zero queue object
+ * @param[out] opencl_queue Underlying OpenCL queue to be used
+ *
+ * @return Status code
+ */
+AclStatus AclSetClQueue(AclQueue queue, cl_command_queue opencl_queue);
+
/** Extract the underlying OpenCL memory object by a given Compute Library tensor object
*
* @param[in] tensor A valid non-zero tensor
diff --git a/arm_compute/AclTypes.h b/arm_compute/AclTypes.h
index 69717ec8a8..902a508b91 100644
--- a/arm_compute/AclTypes.h
+++ b/arm_compute/AclTypes.h
@@ -33,6 +33,8 @@ extern "C" {
/**< Opaque Context object */
typedef struct AclContext_ *AclContext;
+/**< Opaque Queue object */
+typedef struct AclQueue_ *AclQueue;
/**< Opaque Tensor object */
typedef struct AclTensor_ *AclTensor;
/**< Opaque Tensor pack object */
@@ -138,6 +140,22 @@ typedef struct AclContextOptions
AclAllocator *allocator; /**< Allocator to be used by all the memory internally */
} AclContextOptions;
+/**< Supported tuning modes */
+typedef enum
+{
+ AclTuningModeNone = 0, /**< No tuning */
+ AclRapid = 1, /**< Fast tuning mode, testing a small portion of the tuning space */
+ AclNormal = 2, /**< Normal tuning mode, gives a good balance between tuning mode and performance */
+ AclExhaustive = 3, /**< Exhaustive tuning mode, increased tuning time but with best results */
+} AclTuningMode;
+
+/**< Queue options */
+typedef struct
+{
+ AclTuningMode mode; /**< Tuning mode */
+ int32_t compute_units; /**< Compute Units that the queue will deploy */
+} AclQueueOptions;
+
/**< Supported data types */
typedef enum AclDataType
{
diff --git a/arm_compute/core/CL/OpenCL.h b/arm_compute/core/CL/OpenCL.h
index 1e6b04c042..bbe469f1a8 100644
--- a/arm_compute/core/CL/OpenCL.h
+++ b/arm_compute/core/CL/OpenCL.h
@@ -92,6 +92,7 @@ public:
DECLARE_FUNCTION_PTR(clCreateContext);
DECLARE_FUNCTION_PTR(clCreateContextFromType);
DECLARE_FUNCTION_PTR(clCreateCommandQueue);
+ DECLARE_FUNCTION_PTR(clCreateCommandQueueWithProperties);
DECLARE_FUNCTION_PTR(clGetContextInfo);
DECLARE_FUNCTION_PTR(clBuildProgram);
DECLARE_FUNCTION_PTR(clEnqueueNDRangeKernel);