aboutsummaryrefslogtreecommitdiff
path: root/tests/CL/Helper.h
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2021-04-22 21:13:21 +0100
committerGeorgios Pinitas <georgios.pinitas@arm.com>2021-05-18 14:48:39 +0000
commit856f66e6c61b77d03f754cd0fa8439891f0e4aca (patch)
treef9379cd0853ac407109e54c3d53b385ceee066c2 /tests/CL/Helper.h
parent37f4b2ef1ea225a90ccb563fcb2c08f8fb0fb5d5 (diff)
downloadComputeLibrary-856f66e6c61b77d03f754cd0fa8439891f0e4aca.tar.gz
Port CLGEMM to memory injecting interface
Moves the following kernels: - CLGEMMMatrixMultiplyKernel - CLGEMMMatrixMultiplyNativeKernel - CLGEMMMatrixMultipluReshapedKernel - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel Moves the following functions - CLGEMM Introduces facilities to easy handling of auxiliary temporary buffers under then new run interface. Such are: - CLAuxTensorHandler: That allows wrapping of workspace buffers memory to CLBuffer objects - Ability to inject TensorInfo to allocator without transferring ownership. This reduce the copy overhead if needed. Resolves: COMPMID-4188 Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Change-Id: I7055435d831b05b749b26302082e4ac45f26dfb0 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5498 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests/CL/Helper.h')
-rw-r--r--tests/CL/Helper.h83
1 files changed, 83 insertions, 0 deletions
diff --git a/tests/CL/Helper.h b/tests/CL/Helper.h
index 5153e98add..b99911e1e6 100644
--- a/tests/CL/Helper.h
+++ b/tests/CL/Helper.h
@@ -29,8 +29,11 @@
#include "arm_compute/runtime/CL/functions/CLFill.h"
#include "arm_compute/runtime/IFunction.h"
#include "src/core/CL/kernels/CLFillBorderKernel.h"
+#include "src/runtime/gpu/cl/IClOperator.h"
+#include "src/runtime/gpu/cl/operators/ClFill.h"
#include "src/core/CL/ICLKernel.h"
+#include "support/Cast.h"
#include <memory>
@@ -38,6 +41,86 @@ namespace arm_compute
{
namespace test
{
+/** This template synthetizes a simple IOperator which runs the given kernel K */
+template <typename K>
+class CLSynthetizeOperator : public opencl::IClOperator
+{
+public:
+ /** Configure the kernel.
+ *
+ * @param[in] args Configuration arguments.
+ */
+ template <typename... Args>
+ void configure(Args &&... args)
+ {
+ auto k = std::make_unique<K>();
+ k->configure(CLKernelLibrary::get().get_compile_context(), std::forward<Args>(args)...);
+ _kernel = std::move(k);
+ }
+ /** Configure the kernel setting the GPU target as well
+ *
+ * @param[in] gpu_target GPUTarget to set
+ * @param[in] args Configuration arguments.
+ */
+ template <typename... Args>
+ void configure(GPUTarget gpu_target, Args &&... args)
+ {
+ auto k = std::make_unique<K>();
+ k->set_target(gpu_target);
+ k->configure(CLKernelLibrary::get().get_compile_context(), std::forward<Args>(args)...);
+ _kernel = std::move(k);
+ }
+ /** Validate input arguments
+ *
+ * @param[in] args Configuration arguments.
+ */
+ template <typename... Args>
+ static Status validate(Args &&... args)
+ {
+ return K::validate(std::forward<Args>(args)...);
+ }
+};
+
+/** As above but this also initializes to zero the input tensor */
+template <typename K, int bordersize>
+class CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder : public opencl::IClOperator
+{
+public:
+ /** Configure the kernel.
+ *
+ * @param[in] first First input argument.
+ * @param[in] second Second input argument.
+ * @param[in] args Rest of the configuration arguments.
+ */
+ template <typename T, typename... Args>
+ void configure(T first, T second, Args &&... args)
+ {
+ auto cctx = CLKernelLibrary::get().get_compile_context();
+ auto k = std::make_unique<K>();
+ k->set_target(CLScheduler::get().target());
+ k->configure(cctx, first, second, std::forward<Args>(args)...);
+ _kernel = std::move(k);
+ _border_handler.configure(cctx, first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue());
+ _fill.configure(cctx, second, PixelValue());
+ }
+
+ // Inherited method overridden:
+ void run(ITensorPack &tensors) override final
+ {
+ ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The CL kernel or function isn't configured");
+
+ ITensorPack fill_pack = { { ACL_SRC, tensors.get_tensor(TensorType::ACL_DST) } };
+ _fill.run(fill_pack);
+ CLScheduler::get().enqueue_op(_border_handler, tensors);
+ CLScheduler::get().enqueue_op(*_kernel, tensors);
+ }
+
+private:
+ opencl::ClFill _fill{}; /**< Kernel to initialize the tensor */
+ CLFillBorderKernel _border_handler{}; /**< Kernel to handle borders */
+ std::unique_ptr<ICLKernel> _kernel{}; /**< Kernel to run */
+};
+
/** This template synthetizes an ICLSimpleFunction which runs the given kernel K */
template <typename K>
class CLSynthetizeFunction : public ICLSimpleFunction