diff options
author | Moritz Pflanzer <moritz.pflanzer@arm.com> | 2017-08-31 14:56:32 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | beabe3bdf47306d0940ddf2ddf52ada6903a0875 (patch) | |
tree | 97afa72f2d60858898ab2dadb95e4cda7176e88b /arm_compute/runtime | |
parent | 7655a67384895868c0afa72bfda9a9b2fcfdf323 (diff) | |
download | ComputeLibrary-beabe3bdf47306d0940ddf2ddf52ada6903a0875.tar.gz |
COMPMID-481: Add AArch64 GEMM
Change-Id: I34f94f99cb05f0eabafee13c5e623ee779b72360
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/83741
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Reviewed-by: Pablo Tello <pablo.tello@arm.com>
Diffstat (limited to 'arm_compute/runtime')
-rw-r--r-- | arm_compute/runtime/IScheduler.h | 26 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEConvolutionLayer.h | 34 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMM.h | 22 |
3 files changed, 42 insertions, 40 deletions
diff --git a/arm_compute/runtime/IScheduler.h b/arm_compute/runtime/IScheduler.h index 6078abd06b..8918843c98 100644 --- a/arm_compute/runtime/IScheduler.h +++ b/arm_compute/runtime/IScheduler.h @@ -35,23 +35,23 @@ class IScheduler { public: /** Default constructor. */ - IScheduler() - : _target(CPUTarget::INTRINSICS) - { - } + IScheduler(); /** Destructor. */ virtual ~IScheduler() = default; + /** Sets the number of threads the scheduler will use to run the kernels. * * @param[in] num_threads If set to 0, then one thread per CPU core available on the system will be used, otherwise the number of threads specified. */ virtual void set_num_threads(unsigned int num_threads) = 0; + /** Returns the number of threads that the SingleThreadScheduler has in his pool. * * @return Number of threads available in SingleThreadScheduler. */ virtual unsigned int num_threads() const = 0; + /** Runs the kernel in the same thread as the caller synchronously. * * @param[in] kernel Kernel to execute. @@ -65,24 +65,14 @@ public: */ void set_target(CPUTarget target); - /** Return the current CPU target. + /** Get CPU info. * - * @return Target CPU. + * @return CPU info. */ - CPUTarget target() const; + CPUInfo cpu_info() const; protected: - CPUTarget _target; + CPUInfo _info{}; }; - -inline void IScheduler::set_target(CPUTarget target) -{ - _target = target; -} - -inline CPUTarget IScheduler::target() const -{ - return _target; -} } #endif /* __ARM_COMPUTE_ISCHEDULER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index 8e040b3055..893dfa0f9d 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -28,6 +28,7 @@ #include "arm_compute/core/NEON/kernels/NECol2ImKernel.h" #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMTranspose1xWKernel.h" @@ -37,6 +38,8 @@ #include "arm_compute/runtime/MemoryGroup.h" #include "arm_compute/runtime/Tensor.h" +#include <memory> + namespace arm_compute { class ITensor; @@ -59,6 +62,7 @@ public: * Data types supported: Same as @p weights. */ void configure(const ITensor *weights, const ITensor *biases, ITensor *output, bool transpose1xW); + // Inherited methods overridden: void run() override; @@ -82,6 +86,7 @@ class NEConvolutionLayer : public IFunction public: /** Constructor */ NEConvolutionLayer(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Set the input and output tensors. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -96,23 +101,26 @@ public: * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. */ void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + // Inherited methods overridden: void run() override; private: - MemoryGroup _memory_group; - NEIm2ColKernel _input_im2col_kernel; - NEGEMMInterleave4x4Kernel _input_interleave_kernel; - NEConvolutionLayerReshapeWeights _reshape_weights; - NEGEMMMatrixMultiplyKernel _mm_kernel; - NECol2ImKernel _output_col2im_kernel; - Tensor _input_im2col_reshaped; - Tensor _input_interleaved_reshaped; - Tensor _weights_reshaped; - Tensor _gemm_output; - bool _has_bias; - bool _is_fully_connected_convolution; - bool _are_weights_reshaped; + MemoryGroup _memory_group; + NEIm2ColKernel _input_im2col_kernel; + NEGEMMInterleave4x4Kernel _input_interleave_kernel; + NEConvolutionLayerReshapeWeights _reshape_weights; + NEGEMMMatrixMultiplyKernel _mm_kernel; + std::unique_ptr<NEGEMMAssemblyBaseKernel> _mm_optimised_kernel; + NECol2ImKernel _output_col2im_kernel; + Tensor _input_im2col_reshaped; + Tensor _input_interleaved_reshaped; + Tensor _weights_reshaped; + Tensor _gemm_output; + Tensor _workspace; + bool _has_bias; + bool _is_fully_connected_convolution; + bool _are_weights_reshaped; }; } #endif /* __ARM_COMPUTE_NECONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index b4b9e8be01..068e7c5ce8 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -25,6 +25,7 @@ #define __ARM_COMPUTE_NEGEMM_H__ #include "arm_compute/core/NEON/kernels/NEFillBorderKernel.h" +#include "arm_compute/core/NEON/kernels/NEGEMMAssemblyBaseKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMInterleave4x4Kernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixAdditionKernel.h" #include "arm_compute/core/NEON/kernels/NEGEMMMatrixMultiplyKernel.h" @@ -51,6 +52,7 @@ class NEGEMM : public IFunction public: /** Constructor */ NEGEMM(std::shared_ptr<IMemoryManager> memory_manager = nullptr); + /** Initialise the kernel's inputs, output * * @note GEMM: General Matrix Multiply - [alpha * A * B + beta * C]. @@ -69,15 +71,17 @@ public: void run() override; private: - MemoryGroup _memory_group; - NEGEMMInterleave4x4Kernel _interleave_kernel; - NEGEMMTranspose1xWKernel _transpose_kernel; - NEGEMMMatrixMultiplyKernel _mm_kernel; - NEGEMMMatrixAdditionKernel _ma_kernel; - Tensor _tmp_a; - Tensor _tmp_b; - bool _run_vector_matrix_multiplication; - bool _run_addition; + MemoryGroup _memory_group; + NEGEMMInterleave4x4Kernel _interleave_kernel; + NEGEMMTranspose1xWKernel _transpose_kernel; + NEGEMMMatrixMultiplyKernel _mm_kernel; + std::unique_ptr<NEGEMMAssemblyBaseKernel> _mm_optimised_kernel; + NEGEMMMatrixAdditionKernel _ma_kernel; + Tensor _tmp_a; + Tensor _tmp_b; + Tensor _workspace; + bool _run_vector_matrix_multiplication; + bool _run_addition; }; } #endif /*__ARM_COMPUTE_NEGEMM_H__ */ |