From 3d677ccee046cd384abf2142f323f8e9e7a4834f Mon Sep 17 00:00:00 2001 From: Anthony Barbier Date: Mon, 23 Jul 2018 16:42:59 +0100 Subject: COMPMID-1406: Refactor gemm_interleaved to use our own types and scheduler - Ported PrepareB kernel from gemm_interleave - Ported TransformA feature from gemm_interleave - Allocate reshaped a and b buffers - Added memory_manager / memory_group - MatrixMultiply kernel - Interleave kernels execution. - Fixed a few bugs: all nightly Convolution tests passing for threads=1 and threads=4 - Added Doxygen documentations and comments in the code - Added support for all data types supported Change-Id: Iffa1c09fda0bb9c61213bb83524d5a48e7ecb03c Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141281 Tested-by: Jenkins Reviewed-by: Georgios Pinitas --- .../functions/assembly/NEGEMMInterleavedWrapper.h | 99 ++++++++++++++++++++++ 1 file changed, 99 insertions(+) create mode 100644 arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h (limited to 'arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h') diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h new file mode 100644 index 0000000000..cead71ed67 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ +#define __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ + +#include "arm_compute/core/NEON/kernels/assembly/Helpers.h" +#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IScheduler.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/Tensor.h" + +#include + +namespace arm_compute +{ +class ITensor; +class NEGEMMInterleavedPrepareBWrapperKernel; +class PrepareBWorkload; +class TransformAWorkload; +class MatrixMultiplyWorkload; +class NEGEMMInterleavedTransformAWrapper; +class NEGEMMInterleavedMatrixMultiplyWrapper; + +/** Equivalent to arm_gemm::GemmInterleaved but using Compute Library types. + */ +class NEGEMMInterleavedWrapper : public IFunction +{ +public: + NEGEMMInterleavedWrapper(std::shared_ptr memory_manager = nullptr); + + NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete; + NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete; + + /** Initialise the kernel's input and output. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] a Input tensor (Matrix A) + * @param[in] b Input tensor (Matrix B) + * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Scalar multiplier to apply to AB matrix product. + * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. + * @param[in] pretranspose_b If true, pretranspose B once during the prepare() stage instead of on the fly every time. + * @param[in] use_dot (Optional) If the input's type is U8/S8/QASYMM8 then use the dot product flavour or the matrix multiply routine. (Must be supported by the hardware). + */ + void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, bool pretranspose_b, bool use_dot = false); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + MemoryGroup _memory_group; + bool _is_prepared{ false }; + bool _pretranspose_b{ false }; + Window _block_walker{}; + Window _batch_window{}; + const ITensor *_a{ nullptr }; + const ITensor *_b{ nullptr }; + ITensor *_c{ nullptr }; + Tensor _transformed_b{}; + Tensor _transformed_a{}; + Tensor _tmp_c{}; + INEGEMMWrapperKernel::Params _params{}; + BlockSizes _block_sizes{}; + std::unique_ptr _prepare_b{ nullptr }; + std::unique_ptr _transform_a{ nullptr }; + std::unique_ptr _matrix_multiply{ nullptr }; + std::vector _a_workloads{}; + std::vector _b_workloads{}; + std::vector _mm_workloads{}; + std::vector _workloads{}; +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */ -- cgit v1.2.1