From 856f66e6c61b77d03f754cd0fa8439891f0e4aca Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Thu, 22 Apr 2021 21:13:21 +0100 Subject: Port CLGEMM to memory injecting interface Moves the following kernels: - CLGEMMMatrixMultiplyKernel - CLGEMMMatrixMultiplyNativeKernel - CLGEMMMatrixMultipluReshapedKernel - CLGEMMMatrixMultiplyReshapedOnlyRHSKernel Moves the following functions - CLGEMM Introduces facilities to easy handling of auxiliary temporary buffers under then new run interface. Such are: - CLAuxTensorHandler: That allows wrapping of workspace buffers memory to CLBuffer objects - Ability to inject TensorInfo to allocator without transferring ownership. This reduce the copy overhead if needed. Resolves: COMPMID-4188 Signed-off-by: Georgios Pinitas Change-Id: I7055435d831b05b749b26302082e4ac45f26dfb0 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5498 Tested-by: Arm Jenkins Reviewed-by: Michalis Spyrou Comments-Addressed: Arm Jenkins --- src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 122 ----------------------- 1 file changed, 122 deletions(-) delete mode 100644 src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h (limited to 'src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h') diff --git a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h deleted file mode 100644 index 71d223b8ac..0000000000 --- a/src/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h +++ /dev/null @@ -1,122 +0,0 @@ -/* - * Copyright (c) 2017-2020 Arm Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H -#define ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H - -#include "src/core/CL/ICLKernel.h" - -namespace arm_compute -{ -class ICLTensor; - -/** OpenCL kernel to multiply two input matrices "A" and "B" and add a martix "C" if provided. All elements of the output matrix will be multiplied by alpha. In case matrix C is passed, it will be added to the previous result. - * For the matrix C, the broadcast addition is supported if the flag "broadcast_bias" is set in the GEMMReshapeInfo object - * - * @note If the input tensors @p input0 and @p input1 have been reshaped respectively with @ref CLGEMMReshapeLHSMatrixKernel" and @ref CLGEMMReshapeRHSMatrixKernel, - * the flag @p is_interleaved_transposed must be set to true - * - * @attention @p input1 tensor must have at least 2 dimensions (matrix) - * - */ -class CLGEMMMatrixMultiplyKernel : public ICLKernel -{ -public: - /** Default constructor */ - CLGEMMMatrixMultiplyKernel(); - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyKernel(const CLGEMMMatrixMultiplyKernel &) = delete; - /** Prevent instances of this class from being copied (As this class contains pointers) */ - CLGEMMMatrixMultiplyKernel &operator=(const CLGEMMMatrixMultiplyKernel &) = delete; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyKernel(CLGEMMMatrixMultiplyKernel &&) = default; - /** Allow instances of this class to be moved */ - CLGEMMMatrixMultiplyKernel &operator=(CLGEMMMatrixMultiplyKernel &&) = default; - /** Initialise the kernel's input, output and alpha - * - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. - * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * - */ - void configure(const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, - bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); - /** Initialise the kernel's input, output and alpha - * - * @param[in] compile_context The compile context to be used. - * @param[in] input0 Input tensor containing the Matrix A. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B. Data type supported: same as @p input0 - * @param[in] input2 Input tensor containing the Matrix C (bias). Can be nullptr. Data type supported: same as @p input0 - * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta (Optional) Weight of vector C. Default value is 0. Only beta = 1 is currently supported. - * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * - */ - void configure(const CLCompileContext &compile_context, const ICLTensor *input0, const ICLTensor *input1, const ICLTensor *input2, ICLTensor *output, float alpha, float beta = 0.f, - bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo(), bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); - /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel - * - * @param[in] input0 Input tensor containing the Matrix A info. Data types supported: F16/F32 - * @param[in] input1 Input tensor containing the Matrix B info. Data type supported: same as @p input0 - * @param[in] input2 Input tensor containing the Matrix C (bias) info. Can be nullptr. Data type supported: same as @p input0 - * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of vector C. Default value is 0. Only beta = 1 is currently supported. - * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMReshapeLHSMatrixKernel and @ref CLGEMMReshapeRHSMatrixKernel - * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped - * @param[in] gpu_target GPU Target - * @param[in] fp_mixed_precision (Optional) Use wider accumulators (32 bit instead of 16 for FP16) to improve accuracy - * @param[in] activation_info (Optional) Activation to apply after the matrix multiplication - * - * @return a status - */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output, float alpha, float beta, - bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, GPUTarget gpu_target, bool fp_mixed_precision = false, const ActivationLayerInfo &activation_info = ActivationLayerInfo()); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; - -public: - const ICLTensor *_input0; - const ICLTensor *_input1; - const ICLTensor *_input2; - ICLTensor *_output; - bool _slide_matrix_b; - bool _reinterpret_input_as_3d; - bool _reinterpret_output_as_3d; - bool _add_bias; - bool _broadcast_bias; -}; -} // namespace arm_compute -#endif /* ARM_COMPUTE_CLGEMMMATRIXMULTIPLYKERNEL_H */ -- cgit v1.2.1