From 7cd26d4a1b14bc4bf7c61496803416ab3d84791f Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Wed, 9 Jan 2019 18:35:17 +0000 Subject: COMPMID-1867: Add NEON/SVE GEMM Hybrid kernels. Change-Id: Ib40a9921e7f9a6a8be6c38872d6b3a0f24ed0cd3 Reviewed-on: https://review.mlplatform.org/515 Reviewed-by: Anthony Barbier Tested-by: Arm Jenkins --- .../NEON/functions/assembly/NEGEMMInterleavedWrapper.h | 17 +++++++---------- 1 file changed, 7 insertions(+), 10 deletions(-) (limited to 'arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h') diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h index 26236ffb35..3ccfbc512b 100644 --- a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h +++ b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2018 ARM Limited. + * Copyright (c) 2018-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -26,6 +26,9 @@ #include "arm_compute/core/NEON/kernels/assembly/Helpers.h" #include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" +#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.h" +#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.h" +#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedTransformAWrapper.h" #include "arm_compute/runtime/IFunction.h" #include "arm_compute/runtime/IMemoryManager.h" #include "arm_compute/runtime/IScheduler.h" @@ -36,13 +39,8 @@ namespace arm_compute { +// Forward declarations class ITensor; -class NEGEMMInterleavedPrepareBWrapperKernel; -class PrepareBWorkload; -class TransformAWorkload; -class MatrixMultiplyWorkload; -class NEGEMMInterleavedTransformAWrapper; -class NEGEMMInterleavedMatrixMultiplyWrapper; /** Buffer manager used when reshaping B on the fly * @@ -97,6 +95,7 @@ class NEGEMMInterleavedWrapper : public IFunction { public: NEGEMMInterleavedWrapper(std::shared_ptr memory_manager = nullptr); + ~NEGEMMInterleavedWrapper() = default; NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete; NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete; @@ -111,9 +110,8 @@ public: * @param[in] alpha Scalar multiplier to apply to AB matrix product. * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. * @param[in] pretranspose_b If true, pretranspose B once during the prepare() stage instead of on the fly every time. - * @param[in] use_dot (Optional) If the input's type is U8/S8/QASYMM8 then use the dot product flavour or the matrix multiply routine. (Must be supported by the hardware). */ - void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, bool pretranspose_b, bool use_dot = false); + void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, bool pretranspose_b); // Inherited methods overridden: void run() override; @@ -143,6 +141,5 @@ private: std::vector _workloads{}; std::string _tag{}; }; - } // namespace arm_compute #endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */ -- cgit v1.2.1