diff options
Diffstat (limited to 'arm_compute/runtime/NEON/functions/NEGEMM.h')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMM.h | 104 |
1 files changed, 48 insertions, 56 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 645ab56417..29650a5eca 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2020 Arm Limited. + * Copyright (c) 2017-2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -24,38 +24,18 @@ #ifndef ARM_COMPUTE_NEGEMM_H #define ARM_COMPUTE_NEGEMM_H +#include "arm_compute/function_info/GEMMInfo.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IWeightsManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/NEON/functions/NEActivationLayer.h" -#include "arm_compute/runtime/NEON/functions/NEArithmeticAddition.h" -#include "arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h" -#include "arm_compute/runtime/Tensor.h" #include <memory> namespace arm_compute { -class NEGEMMInterleave4x4Kernel; -class NEGEMMMatrixAdditionKernel; -class NEGEMMMatrixMultiplyKernel; -class NEGEMMTranspose1xWKernel; -/** Basic function to execute GEMM on NEON. This function calls the following NEON kernels: +/** Basic function to execute GEMM. This function calls the following kernels: * - * If optimized assembly is available: - * -# @ref NEGEMMAssemblyDispatch - * -# @ref NEActivationLayer (if alpha != 1.0) - * Else: - * -# @ref NEGEMMInterleave4x4Kernel (if the output tensor is a matrix) - * -# @ref NEGEMMTranspose1xWKernel (if the output tensor is a matrix) - * -# @ref NEGEMMMatrixMultiplyKernel - * In both cases: - * -# @ref NEGEMMMatrixAdditionKernel (if c != nullptr and beta != 0.0 and is not reshaped once) - * Else: - * -# @ref NEArithmeticAdditionKernel (if c != nullptr and is reshaped once and not optimized assembly in place) - * - * -# @ref NEActivationLayer (if activation is specified in GEMMInfo) + * -# @ref cpu::CpuGemm */ class NEGEMM : public IFunction { @@ -74,9 +54,21 @@ public: ~NEGEMM(); /** Initialise the kernel's inputs, output * + * Valid data layouts: + * - All + * + * Valid data type configurations: + * |src0 |src1 |src2 |dst | + * |:------------|:-----------|:---------|:--------------| + * |F32 |F32 |F32 |F32 | + * |F16 |F16 |F16 |F16 | + * |BFLOAT16 |BFLOAT16 |BFLOAT16 |BFLOAT16 | + * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. * @note GEMM: The tensors a, b, c, d must have the same data type. You should not mix data types when calling this function. * + * @note Batched GEMM only supports broadcasting cases where RHS rank < LHS rank but not the other way around + * * @param[in] a First input tensor (Matrix A or Vector A). Data type supported: BFLOAT16/F16/F32 * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a @@ -86,49 +78,49 @@ public: * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and * if the reshape of matrix B should happen only for the first run */ - void configure(const ITensor *a, const ITensor *b, const ITensor *c, ITensor *d, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); + void configure(const ITensor *a, + const ITensor *b, + const ITensor *c, + ITensor *d, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMM. * - * @param[in] a First input tensor info (Matrix or Vector A). Data types supported: BFLOAT16/F16/F32 - * @param[in] b Second input tensor info (Matrix B). Data type supported: same as @p a. - * @param[in] c Third input tensor info (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. - * @param[out] output Output tensor info. Data type supported: same as @p a - * @param[in] alpha Weight of the matrix product - * @param[in] beta Weight of matrix C - * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should happen only for the first run + * Similar to @ref NEGEMM::configure() + * + * @return a status + */ + static Status validate(const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *output, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); + + /** Static function that queries whether there exists fixed-format kernel and if it exists it will return in the first argument in what format + * weights are expected to be reshaped as defined by WeightFormat class. Apart from the first argument the rest of the arguments are the same + * as in @ref NEGEMM::validate() except that all arguments are required. * * @return a status */ - static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ITensorInfo *c, const ITensorInfo *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); + static Status has_opt_impl(arm_compute::WeightFormat &expected_weight_format, + const ITensorInfo *a, + const ITensorInfo *b, + const ITensorInfo *c, + const ITensorInfo *output, + float alpha, + float beta, + const GEMMInfo &gemm_info = GEMMInfo()); // Inherited methods overridden: void run() override; void prepare() override; private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - std::unique_ptr<NEGEMMInterleave4x4Kernel> _interleave_kernel; - std::unique_ptr<NEGEMMTranspose1xWKernel> _transpose_kernel; - std::unique_ptr<NEGEMMMatrixMultiplyKernel> _mm_kernel; - NEGEMMAssemblyDispatch _asm_glue; - std::unique_ptr<NEGEMMMatrixAdditionKernel> _ma_kernel; - NEActivationLayer _alpha_scale_func; - NEArithmeticAddition _add_bias; - NEActivationLayer _activation_func; - - Tensor _tmp_a; - Tensor _tmp_b; - Tensor _tmp_d; - const ITensor *_original_b; - bool _run_vector_matrix_multiplication; - bool _run_alpha_scale; - bool _run_addition; - bool _run_bias_addition; - bool _run_activation; - bool _reshape_b_only_on_first_run; - bool _is_prepared; + struct Impl; + std::unique_ptr<Impl> _impl; }; } // namespace arm_compute #endif /*ARM_COMPUTE_NEGEMM_H */ |