diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h | 18 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h | 10 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h | 18 | ||||
-rw-r--r-- | arm_compute/core/Types.h | 108 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/ShapeCalculator.h | 22 | ||||
-rw-r--r-- | arm_compute/graph/Graph.h | 8 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLGEMM.h | 5 |
7 files changed, 149 insertions, 40 deletions
diff --git a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h index 2520eff5de..c0fef45afe 100644 --- a/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMInterleave4x4Kernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -47,7 +47,7 @@ class ICLTensor; * \end{array} \right) * @f] * - * After this operation, the output matrix will have the following shape: [ height * 4, ceil(width / 4.0f) ] + * After this operation, the output matrix will have the following shape: [ height * W, ceil(width / W) ] where W = 4 * mult_interleave4x4_height */ class CLGEMMInterleave4x4Kernel : public ICLKernel { @@ -64,18 +64,20 @@ public: CLGEMMInterleave4x4Kernel &operator=(CLGEMMInterleave4x4Kernel &&) = default; /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 - * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] mult_interleave4x4_height (Optional) Multiplication factor for the height of the 4x4 interleave block */ - void configure(const ICLTensor *input, ICLTensor *output); + void configure(const ICLTensor *input, ICLTensor *output, int mult_interleave4x4_height = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMInterleave4x4Kernel * - * @param[in] input Input tensor info. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 - * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. + * @param[in] input Input tensor info. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 + * @param[in] output Output tensor info which stores the interleaved matrix. Data type supported: same as @p input. + * @param[in] mult_interleave4x4_height Multiplication factor for the height of the 4x4 interleave block * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int mult_interleave4x4_height); // Inherited methods overridden void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h index 4e73d7eb13..7260c4a4f6 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixMultiplyKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -58,8 +58,10 @@ public: * @param[out] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 * @param[in] alpha Weight of the matrix product * @param[in] is_interleaved_transposed (Optional) True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel + * @param[in] reshape_info (Optional) GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped + * */ - void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha, bool is_interleaved_transposed = true); + void configure(const ICLTensor *input0, const ICLTensor *input1, ICLTensor *output, float alpha, bool is_interleaved_transposed = true, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixMultiplyKernel * * @param[in] input0 Input tensor containing the Matrix A. Data types supported: QS8/QS16/F16/F32 @@ -67,11 +69,13 @@ public: * @param[in] output Output tensor to store the result of matrix multiplication. Data type supported: same as @p input0 * @param[in] alpha Weight of the matrix product * @param[in] is_interleaved_transposed True if input0 and input1 have been reshaped respectively using @ref CLGEMMInterleave4x4Kernel and @ref CLGEMMTranspose1xWKernel + * @param[in] reshape_info GEMM reshape info. If is_interleaved_transposed = true, this object must contain the information to understand how the matrix A and matrix B have been reshaped * @param[in] gpu_target GPU Target * * @return a status */ - static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed, GPUTarget gpu_target); + static Status validate(const ITensorInfo *input0, const ITensorInfo *input1, const ITensorInfo *output, float alpha, bool is_interleaved_transposed, const GEMMReshapeInfo &reshape_info, + GPUTarget gpu_target); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h index 8721643c1e..9a3069eab6 100644 --- a/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMTranspose1xWKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -62,7 +62,7 @@ class ICLTensor; * \end{array} \right) * @f] * - * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) + * @note The output matrix will have the following shape: [ height * W, ceil(width / W) ], where W = (16 / element size of the tensor) * mult_transpose1xW_width * */ class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel @@ -70,18 +70,20 @@ class CLGEMMTranspose1xWKernel : public ICLSimple2DKernel public: /** Initialise the kernel's input and output. * - * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 - * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 + * @param[out] output Output tensor. Data type supported: same as @p input + * @param[in] mult_transpose1xW_width (Optional) Multiplication factor for the width of the 1xW transposed block */ - void configure(const ICLTensor *input, ICLTensor *output); + void configure(const ICLTensor *input, ICLTensor *output, int mult_transpose1xW_width = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMTranspose1xWKernel * - * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 - * @param[in] output Output tensor. Data type supported: same as @p input. + * @param[in] input Input tensor. Data types supported: U8/S8/QS8/QASYMM8/U16/S16/QS16/F16/U32/S32/F32 + * @param[in] output Output tensor. Data type supported: same as @p input. + * @param[in] mult_transpose1xW_width Multiplication factor for the width of the 1xW transposed block * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, int mult_transpose1xW_width); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/Types.h b/arm_compute/core/Types.h index 5402e358b5..5197000bf9 100644 --- a/arm_compute/core/Types.h +++ b/arm_compute/core/Types.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2018 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -824,13 +824,95 @@ private: const unsigned int _num_kernels; }; -/** GEMM Information class. This class stores the necessary information to compute GEMM functions */ +/** GEMM reshape information class. This class stores the necessary information about matrix A and matrix B reshape. + * + * The matrix A can only be reshaped through @ref CLGEMMInterleave4x4Kernel or @ref NEGEMMInterleave4x4Kernel or @ref GCGEMMInterleave4x4Kernel + * Note: Optionally just for @ref CLGEMMInterleave4x4Kernel is it possible to set mult_interleave4x4_height, the multiplication factor for the height of the 4x4 interleaved block + * + * The matrix B can only be reshaped through @ref CLGEMMTranspose1xWKernel or @ref NEGEMMTranspose1xWKernel or @ref GCGEMMTranspose1xWKernel + * Note: Optionally just for @ref CLGEMMTranspose1xWKernel is it possible to set mult_transpose1xW_width, the multiplication factor for the width of the 1xW transposed block + * + */ +class GEMMReshapeInfo final +{ +public: + /** Default constructor */ + GEMMReshapeInfo() + : _m(1), _n(1), _k(1), _mult_transpose1xW_width(1), _mult_interleave4x4_height(1) + { + } + /** Constructor + * + * @param[in] m Number of matrix A rows + * @param[in] n Number of matrix B columns + * @param[in] k Number of matrix A columns or matrix B rows + * @param[in] mult_transpose1xW_width (Optional) Multiplication factor for the width of the 1xW transposed block + * @param[in] mult_interleave4x4_height (Optional) Multiplication factor for the height of the 4x4 interleaved block + */ + GEMMReshapeInfo(int m, int n, int k, int mult_transpose1xW_width = 1, int mult_interleave4x4_height = 1) + : _m(m), _n(n), _k(k), _mult_transpose1xW_width(mult_transpose1xW_width), _mult_interleave4x4_height(mult_interleave4x4_height) + { + } + /** Number of matrix A rows + * + * @return the number of matrix A rows + */ + int m() const + { + return _m; + } + /** Number of matrix B columns + * + * @return the number of matrix B columns + */ + int n() const + { + return _n; + } + /** Number of matrix A columns or matrix B rows + * + * @return the number of matrix A columns or matrix B rows + */ + int k() const + { + return _k; + } + /** Multiplication factor for the width of the 1xW transposed block + * + * @return the multiplication factor for the width of the 1xW transposed block + */ + int mult_transpose1xW_width() const + { + return _mult_transpose1xW_width; + } + /** Multiplication factor for the height of the 4x4 interleaved block + * + * @return the multiplication factor for the height of the 4x4 interleaved block + */ + int mult_interleave4x4_height() const + { + return _mult_interleave4x4_height; + } + +private: + const int _m; + const int _n; + const int _k; + const int _mult_transpose1xW_width; + const int _mult_interleave4x4_height; +}; + +/** GEMM information class. This class stores the necessary information to compute GEMM functions + * + * This object also contains the information about how matrix A and matrix B have been reshaped + * + */ class GEMMInfo { public: /** Default constructor */ GEMMInfo() - : _is_a_reshaped(false), _is_b_reshaped(false), _reshape_b_only_on_first_run(false) + : _is_a_reshaped(false), _is_b_reshaped(false), _reshape_b_only_on_first_run(false), _reshape_info() { } /** Constructor @@ -838,9 +920,10 @@ public: * @param[in] is_a_reshaped True if the matrix A has been reshaped * @param[in] is_b_reshaped True if the matrix B has been reshaped * @param[in] reshape_b_only_on_first_run Reshape matrix B only for the first run + * @param[in] reshape_info (Optional) GEMM reshape information object */ - GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run) - : _is_a_reshaped(is_a_reshaped), _is_b_reshaped(is_b_reshaped), _reshape_b_only_on_first_run(reshape_b_only_on_first_run) + GEMMInfo(bool is_a_reshaped, bool is_b_reshaped, bool reshape_b_only_on_first_run, const GEMMReshapeInfo &reshape_info = GEMMReshapeInfo()) + : _is_a_reshaped(is_a_reshaped), _is_b_reshaped(is_b_reshaped), _reshape_b_only_on_first_run(reshape_b_only_on_first_run), _reshape_info(reshape_info) { } /** Flag which specifies if the matrix A has been reshaped @@ -869,11 +952,20 @@ public: { return _reshape_b_only_on_first_run; }; + /** GEMMReshapeInfo object which stores the necessary information to understand how the matrix A and matrix B have been reshaped + * + * @return the GEMMReshapeInfo object + */ + const GEMMReshapeInfo &reshape_info() const + { + return _reshape_info; + } private: - const bool _is_a_reshaped; - const bool _is_b_reshaped; - const bool _reshape_b_only_on_first_run; + const bool _is_a_reshaped; + const bool _is_b_reshaped; + const bool _reshape_b_only_on_first_run; + GEMMReshapeInfo _reshape_info; }; /** IO formatting information class*/ diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index 61834b88a9..6ecfdf0323 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017, 2018 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -39,12 +39,14 @@ inline TensorShape compute_permutation_output_shape(const ITensorInfo &input, co permute(output_shape, perm); return output_shape; } -inline TensorShape compute_interleaved_shape(const ITensorInfo &a) +inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1) { - // The interleaved output matrix will have the following shape: [ a_height * 4, ceil(a_width / 4.0f) ] + // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height + ARM_COMPUTE_ERROR_ON(mult_interleave4x4_height < 1); + const int interleave_width = 4 * mult_interleave4x4_height; TensorShape shape_interleaved_a{ a.tensor_shape() }; - shape_interleaved_a.set(0, a.dimension(0) * 4); - shape_interleaved_a.set(1, std::ceil(a.dimension(1) / 4.f)); + shape_interleaved_a.set(0, a.dimension(0) * interleave_width); + shape_interleaved_a.set(1, std::ceil(a.dimension(1) / static_cast<float>(interleave_width))); return shape_interleaved_a; } @@ -57,12 +59,14 @@ inline TensorShape compute_transpose1xW_shape(const ITensorInfo &b) return shape_transposed1xW_b; } -inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInfo &b) +inline TensorShape compute_transpose1xW_with_element_size_shape(const ITensorInfo &b, int mult_transpose1xW_width = 1) { - // The transpose1xW output matrix will have the following shape: - // [ b_height * (16 / element_size), ceil(b_width / (16.0f / element_size) ] + // Note: mult_transpose1xW_width expresses the number of chunks with size 1x(W) we want to store on the same row + // The transpose1xW output matrix will have the following shape: + // [ b_height * W, ceil(b_width / W) ] where W = (16 / element size of the tensor) * mult_transpose1xW_width + ARM_COMPUTE_ERROR_ON(mult_transpose1xW_width < 1); TensorShape shape_transposed1xW_b{ b.tensor_shape() }; - const size_t transpose_width = 16 / b.element_size(); + const size_t transpose_width = (16 / b.element_size()) * mult_transpose1xW_width; shape_transposed1xW_b.set(0, b.dimension(1) * transpose_width); shape_transposed1xW_b.set(1, static_cast<size_t>(std::ceil(b.dimension(0) / static_cast<float>(transpose_width)))); diff --git a/arm_compute/graph/Graph.h b/arm_compute/graph/Graph.h index ab1d8b8866..853b90df82 100644 --- a/arm_compute/graph/Graph.h +++ b/arm_compute/graph/Graph.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -24,6 +24,7 @@ #ifndef __ARM_COMPUTE_GRAPH_GRAPH_H__ #define __ARM_COMPUTE_GRAPH_GRAPH_H__ +#include "arm_compute/core/CL/CLTypes.h" #include "arm_compute/graph/INode.h" #include "arm_compute/graph/ITensorObject.h" #include "arm_compute/graph/SubTensor.h" @@ -67,9 +68,12 @@ public: * @param[in] tensor Tensor to add */ void add_tensor_object(std::unique_ptr<ITensorObject> tensor); - /** Finalizes the current node's configuration + /** Check if the OpenCL target is available */ static bool opencl_is_available(); + /** Returns the GPU target + */ + static GPUTarget gpu_target(); /** Manually sets the output of the current node * * @param[in] tmp Output info to set diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index bf41226bda..0f144915d7 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016, 2017 ARM Limited. + * Copyright (c) 2016-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -68,7 +68,8 @@ public: * @param[in] alpha Weight of the matrix product * @param[in] beta Weight of matrix C * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and - * if the reshape of matrix B should happen only for the first run + * if the reshape of matrix B should happen only for the first run. GEMMInfo also contains information about the reshaping + * in case matrix A and matrix B have been already transformed. */ void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); |