From 856f66e6c61b77d03f754cd0fa8439891f0e4aca Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 22 Apr 2021 21:13:21 +0100 Subject: Port CLGEMM to memory injecting interface Moves the following kernels: - CLGEMMMatrixMultiplyKernel - CLGEMMMatrixMultiplyNativeKernel - CLGEMMMatrixMultipluReshapedKernel - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel Moves the following functions - CLGEMM Introduces facilities to easy handling of auxiliary temporary buffers under then new run interface. Such are: - CLAuxTensorHandler: That allows wrapping of workspace buffers memory to CLBuffer objects - Ability to inject TensorInfo to allocator without transferring ownership. This reduce the copy overhead if needed. Resolves: COMPMID-4188 Signed-off-by: Georgios Pinitas Change-Id: I7055435d831b05b749b26302082e4ac45f26dfb0 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5498 Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins --- tests/CL/Helper.h | 83 +++++++++++++++++++++++++++++++++++++++++++++++++++++++ 1 file changed, 83 insertions(+) (limited to 'tests/CL/Helper.h') diff --git a/tests/CL/Helper.h b/tests/CL/Helper.h index 5153e98add..b99911e1e6 100644 --- a/tests/CL/Helper.h +++ b/tests/CL/Helper.h @@ -29,8 +29,11 @@ #include "arm_compute/runtime/CL/functions/CLFill.h" #include "arm_compute/runtime/IFunction.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/runtime/gpu/cl/IClOperator.h" +#include "src/runtime/gpu/cl/operators/ClFill.h" #include "src/core/CL/ICLKernel.h" +#include "support/Cast.h" #include @@ -38,6 +41,86 @@ namespace arm_compute { namespace test { +/** This template synthetizes a simple IOperator which runs the given kernel K */ +template +class CLSynthetizeOperator : public opencl::IClOperator +{ +public: + /** Configure the kernel. + * + * @param[in] args Configuration arguments. + */ + template + void configure(Args &&... args) + { + auto k = std::make_unique(); + k->configure(CLKernelLibrary::get().get_compile_context(), std::forward(args)...); + _kernel = std::move(k); + } + /** Configure the kernel setting the GPU target as well + * + * @param[in] gpu_target GPUTarget to set + * @param[in] args Configuration arguments. + */ + template + void configure(GPUTarget gpu_target, Args &&... args) + { + auto k = std::make_unique(); + k->set_target(gpu_target); + k->configure(CLKernelLibrary::get().get_compile_context(), std::forward(args)...); + _kernel = std::move(k); + } + /** Validate input arguments + * + * @param[in] args Configuration arguments. + */ + template + static Status validate(Args &&... args) + { + return K::validate(std::forward(args)...); + } +}; + +/** As above but this also initializes to zero the input tensor */ +template +class CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder : public opencl::IClOperator +{ +public: + /** Configure the kernel. + * + * @param[in] first First input argument. + * @param[in] second Second input argument. + * @param[in] args Rest of the configuration arguments. + */ + template + void configure(T first, T second, Args &&... args) + { + auto cctx = CLKernelLibrary::get().get_compile_context(); + auto k = std::make_unique(); + k->set_target(CLScheduler::get().target()); + k->configure(cctx, first, second, std::forward(args)...); + _kernel = std::move(k); + _border_handler.configure(cctx, first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue()); + _fill.configure(cctx, second, PixelValue()); + } + + // Inherited method overridden: + void run(ITensorPack &tensors) override final + { + ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The CL kernel or function isn't configured"); + + ITensorPack fill_pack = { { ACL_SRC, tensors.get_tensor(TensorType::ACL_DST) } }; + _fill.run(fill_pack); + CLScheduler::get().enqueue_op(_border_handler, tensors); + CLScheduler::get().enqueue_op(*_kernel, tensors); + } + +private: + opencl::ClFill _fill{}; /**< Kernel to initialize the tensor */ + CLFillBorderKernel _border_handler{}; /**< Kernel to handle borders */ + std::unique_ptr _kernel{}; /**< Kernel to run */ +}; + /** This template synthetizes an ICLSimpleFunction which runs the given kernel K */ template class CLSynthetizeFunction : public ICLSimpleFunction -- cgit v1.2.1