diff options
author | Anthony Barbier <anthony.barbier@arm.com> | 2018-07-23 16:42:59 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:54:54 +0000 |
commit | 3d677ccee046cd384abf2142f323f8e9e7a4834f (patch) | |
tree | 2e0d86a1b2438cb94386c55d1bc89b3e1061214c /arm_compute/runtime | |
parent | 597a85666a84c9a9414264966651551564b79299 (diff) | |
download | ComputeLibrary-3d677ccee046cd384abf2142f323f8e9e7a4834f.tar.gz |
COMPMID-1406: Refactor gemm_interleaved to use our own types and scheduler
- Ported PrepareB kernel from gemm_interleave
- Ported TransformA feature from gemm_interleave
- Allocate reshaped a and b buffers
- Added memory_manager / memory_group
- MatrixMultiply kernel
- Interleave kernels execution.
- Fixed a few bugs: all nightly Convolution tests passing for threads=1
and threads=4
- Added Doxygen documentations and comments in the code
- Added support for all data types supported
Change-Id: Iffa1c09fda0bb9c61213bb83524d5a48e7ecb03c
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/141281
Tested-by: Jenkins <bsgcomp@arm.com>
Reviewed-by: Georgios Pinitas <georgios.pinitas@arm.com>
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h | 5 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h | 99 |
2 files changed, 102 insertions, 2 deletions
diff --git a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h index 382ef1caba..2fc2cf4a99 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMAssemblyDispatch.h @@ -77,8 +77,9 @@ private: bool create_function(arm_gemm::GemmMethod method, const ITensor *a, const ITensor *b, ITensor *d, float alpha, float beta, bool pretranspose_hint); /** Interface for the arm_gemm fallback */ - std::unique_ptr<IFallback> _arm_gemm; - MemoryGroup _memory_group; /**< Function memory group */ + std::unique_ptr<IFallback> _arm_gemm; + MemoryGroup _memory_group; /**< Function memory group */ + std::shared_ptr<IMemoryManager> _memory_manager; /**< Copy of the memory manager used to create the memory group to be used when instantiating new functions */ public: /** If supported create an ACL function else fallback to the arm_gemm function. * diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h new file mode 100644 index 0000000000..cead71ed67 --- /dev/null +++ b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h @@ -0,0 +1,99 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ +#define __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ + +#include "arm_compute/core/NEON/kernels/assembly/Helpers.h" +#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" +#include "arm_compute/runtime/IFunction.h" +#include "arm_compute/runtime/IMemoryManager.h" +#include "arm_compute/runtime/IScheduler.h" +#include "arm_compute/runtime/MemoryGroup.h" +#include "arm_compute/runtime/Tensor.h" + +#include <memory> + +namespace arm_compute +{ +class ITensor; +class NEGEMMInterleavedPrepareBWrapperKernel; +class PrepareBWorkload; +class TransformAWorkload; +class MatrixMultiplyWorkload; +class NEGEMMInterleavedTransformAWrapper; +class NEGEMMInterleavedMatrixMultiplyWrapper; + +/** Equivalent to arm_gemm::GemmInterleaved but using Compute Library types. + */ +class NEGEMMInterleavedWrapper : public IFunction +{ +public: + NEGEMMInterleavedWrapper(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + + NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete; + NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete; + + /** Initialise the kernel's input and output. + * + * @note The input and output tensor must have the same dimensions + * + * @param[in] a Input tensor (Matrix A) + * @param[in] b Input tensor (Matrix B) + * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. + * @param[in] alpha Scalar multiplier to apply to AB matrix product. + * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. + * @param[in] pretranspose_b If true, pretranspose B once during the prepare() stage instead of on the fly every time. + * @param[in] use_dot (Optional) If the input's type is U8/S8/QASYMM8 then use the dot product flavour or the matrix multiply routine. (Must be supported by the hardware). + */ + void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, bool pretranspose_b, bool use_dot = false); + + // Inherited methods overridden: + void run() override; + void prepare() override; + +private: + MemoryGroup _memory_group; + bool _is_prepared{ false }; + bool _pretranspose_b{ false }; + Window _block_walker{}; + Window _batch_window{}; + const ITensor *_a{ nullptr }; + const ITensor *_b{ nullptr }; + ITensor *_c{ nullptr }; + Tensor _transformed_b{}; + Tensor _transformed_a{}; + Tensor _tmp_c{}; + INEGEMMWrapperKernel::Params _params{}; + BlockSizes _block_sizes{}; + std::unique_ptr<NEGEMMInterleavedPrepareBWrapperKernel> _prepare_b{ nullptr }; + std::unique_ptr<NEGEMMInterleavedTransformAWrapper> _transform_a{ nullptr }; + std::unique_ptr<NEGEMMInterleavedMatrixMultiplyWrapper> _matrix_multiply{ nullptr }; + std::vector<TransformAWorkload> _a_workloads{}; + std::vector<PrepareBWorkload> _b_workloads{}; + std::vector<MatrixMultiplyWorkload> _mm_workloads{}; + std::vector<IScheduler::Workload> _workloads{}; +}; + +} // namespace arm_compute +#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */ |