From 48b3ef89de5f21a0169d8416e3d54081f82c7bf8 Mon Sep 17 00:00:00 2001 From: Georgios Pinitas Date: Mon, 14 Oct 2019 19:03:09 +0100 Subject: COMPMID-2577: Fuse bias addition and activation in gemm assembly kernels Change-Id: I7f52112d2d05b1ea3d3f3d4b19b8eafab05d6c44 Signed-off-by: Georgios Pinitas Reviewed-on: https://review.mlplatform.org/c/2141 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Pablo Marquez --- .../functions/assembly/NEGEMMInterleavedWrapper.h | 147 --------------------- 1 file changed, 147 deletions(-) delete mode 100644 arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h (limited to 'arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h') diff --git a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h b/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h deleted file mode 100644 index 695dcd5b6e..0000000000 --- a/arm_compute/runtime/NEON/functions/assembly/NEGEMMInterleavedWrapper.h +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Copyright (c) 2018-2019 ARM Limited. - * - * SPDX-License-Identifier: MIT - * - * Permission is hereby granted, free of charge, to any person obtaining a copy - * of this software and associated documentation files (the "Software"), to - * deal in the Software without restriction, including without limitation the - * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or - * sell copies of the Software, and to permit persons to whom the Software is - * furnished to do so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -#ifndef __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ -#define __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ - -#include "arm_compute/core/NEON/kernels/assembly/Helpers.h" -#include "arm_compute/core/NEON/kernels/assembly/INEGEMMWrapperKernel.h" -#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedMatrixMultiplyWrapper.h" -#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedPrepareBWrapperKernel.h" -#include "arm_compute/core/NEON/kernels/assembly/NEGEMMInterleavedTransformAWrapper.h" -#include "arm_compute/runtime/IFunction.h" -#include "arm_compute/runtime/IMemoryManager.h" -#include "arm_compute/runtime/IScheduler.h" -#include "arm_compute/runtime/IWeightsManager.h" -#include "arm_compute/runtime/MemoryGroup.h" -#include "arm_compute/runtime/Tensor.h" - -#include - -namespace arm_compute -{ -// Forward declarations -class ITensor; - -/** Buffer manager used when reshaping B on the fly - * - * The typical workflow is: - * - lock_to_reshape_if_needed() - * - If the previous lock was successful: mark_as_reshaped() - * - wait_for_reshaping() wait for the reshaping to be complete - * - mark_as_unused() once the thread is done using this given buffer. - * - * Calls for different indices might be interleaved, however the calls for a given index must always be in that order. - */ -class IBufferManager -{ -public: - /** Lock a buffer for the given index if it's available else return - * - * @param[in] index Index of the buffer to lock - * - * @return True if the buffer has been successfully locked, false if it's already reshaped / being reshaped. - */ - virtual bool lock_to_reshape_if_needed(unsigned int index) = 0; - /** Mark a buffer previously locked as reshaped - * - * @pre The thread calling this function must have locked the given buffer through lock_to_reshape_if_needed() - * - * @param[in] index Index of the buffer to mark as reshaped - */ - virtual void mark_as_reshaped(unsigned int index) = 0; - /** Block until the given buffer is marked as reshaped - * - * @param[in] index Index of the buffer - */ - virtual void wait_for_reshaping(unsigned int index) = 0; - /** Mark a reshaped buffer as unused - * - * Once all the users have marked a buffer as unused then it goes back to being free - */ - virtual void mark_as_unused(unsigned int index) = 0; - - /** Number of buffers used internally - * - * @return The number of buffers used by the manager. - */ - virtual unsigned int num_buffers() const = 0; - /** Default destructor */ - virtual ~IBufferManager() = default; -}; - -/** Equivalent to arm_gemm::GemmInterleaved but using Compute Library types. - */ -class NEGEMMInterleavedWrapper : public IFunction -{ -public: - NEGEMMInterleavedWrapper(std::shared_ptr memory_manager = nullptr, IWeightsManager *weights_manager = nullptr); - ~NEGEMMInterleavedWrapper() = default; - - NEGEMMInterleavedWrapper(const NEGEMMInterleavedWrapper &) = delete; - NEGEMMInterleavedWrapper &operator=(const NEGEMMInterleavedWrapper &) = delete; - - /** Initialise the kernel's input and output. - * - * @note The input and output tensor must have the same dimensions - * - * @param[in] a Input tensor (Matrix A) - * @param[in] b Input tensor (Matrix B) - * @param[out] c Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0. - * @param[in] alpha Scalar multiplier to apply to AB matrix product. - * @param[in] beta Scalar multiplier to apply to input C matrix before adding product. - * @param[in] gemm_info GEMM meta-data - */ - void configure(const ITensor *a, const ITensor *b, ITensor *c, float alpha, float beta, const GEMMInfo &gemm_info); - - // Inherited methods overridden: - void run() override; - void prepare() override; - -private: - MemoryGroup _memory_group; - IWeightsManager *_weights_manager; - bool _is_prepared{ false }; - bool _pretranspose_b{ false }; - Window _block_walker{}; - Window _batch_window{}; - const ITensor *_a{ nullptr }; - const ITensor *_b{ nullptr }; - ITensor *_c{ nullptr }; - Tensor _transformed_b{}; - Tensor _transformed_a{}; - Tensor _tmp_c{}; - INEGEMMWrapperKernel::Params _params{}; - BlockSizes _block_sizes{}; - std::unique_ptr _prepare_b{ nullptr }; - std::unique_ptr _transform_a{ nullptr }; - std::unique_ptr _matrix_multiply{ nullptr }; - std::unique_ptr _buffer_manager{ nullptr }; - std::vector _a_workloads{}; - std::vector _b_workloads{}; - std::vector _mm_workloads{}; - std::vector _workloads{}; - std::string _tag{}; -}; -} // namespace arm_compute -#endif /* __ARM_COMPUTE_NEGEMMINTERLEAVEDWRAPPER_H__ */ -- cgit v1.2.1