diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-04-22 21:13:21 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2021-05-18 14:48:39 +0000 |
commit | 856f66e6c61b77d03f754cd0fa8439891f0e4aca (patch) | |
tree | f9379cd0853ac407109e54c3d53b385ceee066c2 /tests/CL/Helper.h | |
parent | 37f4b2ef1ea225a90ccb563fcb2c08f8fb0fb5d5 (diff) | |
download | ComputeLibrary-856f66e6c61b77d03f754cd0fa8439891f0e4aca.tar.gz |
Port CLGEMM to memory injecting interface
Moves the following kernels:
- CLGEMMMatrixMultiplyKernel
- CLGEMMMatrixMultiplyNativeKernel
- CLGEMMMatrixMultipluReshapedKernel
- CLGEMMMatrixMultiplyReshapedOnlyRHSKernel
Moves the following functions
- CLGEMM
Introduces facilities to easy handling of auxiliary temporary buffers
under then new run interface. Such are:
- CLAuxTensorHandler: That allows wrapping of workspace buffers memory
to CLBuffer objects
- Ability to inject TensorInfo to allocator without transferring
ownership. This reduce the copy overhead if needed.
Resolves: COMPMID-4188
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Change-Id: I7055435d831b05b749b26302082e4ac45f26dfb0
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5498
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Michalis Spyrou <michalis.spyrou@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'tests/CL/Helper.h')
-rw-r--r-- | tests/CL/Helper.h | 83 |
1 files changed, 83 insertions, 0 deletions
diff --git a/tests/CL/Helper.h b/tests/CL/Helper.h index 5153e98add..b99911e1e6 100644 --- a/tests/CL/Helper.h +++ b/tests/CL/Helper.h @@ -29,8 +29,11 @@ #include "arm_compute/runtime/CL/functions/CLFill.h" #include "arm_compute/runtime/IFunction.h" #include "src/core/CL/kernels/CLFillBorderKernel.h" +#include "src/runtime/gpu/cl/IClOperator.h" +#include "src/runtime/gpu/cl/operators/ClFill.h" #include "src/core/CL/ICLKernel.h" +#include "support/Cast.h" #include <memory> @@ -38,6 +41,86 @@ namespace arm_compute { namespace test { +/** This template synthetizes a simple IOperator which runs the given kernel K */ +template <typename K> +class CLSynthetizeOperator : public opencl::IClOperator +{ +public: + /** Configure the kernel. + * + * @param[in] args Configuration arguments. + */ + template <typename... Args> + void configure(Args &&... args) + { + auto k = std::make_unique<K>(); + k->configure(CLKernelLibrary::get().get_compile_context(), std::forward<Args>(args)...); + _kernel = std::move(k); + } + /** Configure the kernel setting the GPU target as well + * + * @param[in] gpu_target GPUTarget to set + * @param[in] args Configuration arguments. + */ + template <typename... Args> + void configure(GPUTarget gpu_target, Args &&... args) + { + auto k = std::make_unique<K>(); + k->set_target(gpu_target); + k->configure(CLKernelLibrary::get().get_compile_context(), std::forward<Args>(args)...); + _kernel = std::move(k); + } + /** Validate input arguments + * + * @param[in] args Configuration arguments. + */ + template <typename... Args> + static Status validate(Args &&... args) + { + return K::validate(std::forward<Args>(args)...); + } +}; + +/** As above but this also initializes to zero the input tensor */ +template <typename K, int bordersize> +class CLSynthetizeOperatorInitOutputWithZeroAndWithZeroConstantBorder : public opencl::IClOperator +{ +public: + /** Configure the kernel. + * + * @param[in] first First input argument. + * @param[in] second Second input argument. + * @param[in] args Rest of the configuration arguments. + */ + template <typename T, typename... Args> + void configure(T first, T second, Args &&... args) + { + auto cctx = CLKernelLibrary::get().get_compile_context(); + auto k = std::make_unique<K>(); + k->set_target(CLScheduler::get().target()); + k->configure(cctx, first, second, std::forward<Args>(args)...); + _kernel = std::move(k); + _border_handler.configure(cctx, first, BorderSize(bordersize), BorderMode::CONSTANT, PixelValue()); + _fill.configure(cctx, second, PixelValue()); + } + + // Inherited method overridden: + void run(ITensorPack &tensors) override final + { + ARM_COMPUTE_ERROR_ON_MSG(!_kernel, "The CL kernel or function isn't configured"); + + ITensorPack fill_pack = { { ACL_SRC, tensors.get_tensor(TensorType::ACL_DST) } }; + _fill.run(fill_pack); + CLScheduler::get().enqueue_op(_border_handler, tensors); + CLScheduler::get().enqueue_op(*_kernel, tensors); + } + +private: + opencl::ClFill _fill{}; /**< Kernel to initialize the tensor */ + CLFillBorderKernel _border_handler{}; /**< Kernel to handle borders */ + std::unique_ptr<ICLKernel> _kernel{}; /**< Kernel to run */ +}; + /** This template synthetizes an ICLSimpleFunction which runs the given kernel K */ template <typename K> class CLSynthetizeFunction : public ICLSimpleFunction |