diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/CL/kernels/CLCol2ImKernel.h | 12 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h | 11 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/CLIm2ColKernel.h | 6 | ||||
-rw-r--r-- | arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h | 15 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/ShapeCalculator.h | 20 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLGEMM.h | 14 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h | 56 | ||||
-rw-r--r-- | arm_compute/runtime/NEON/functions/NEGEMM.h | 2 |
8 files changed, 114 insertions, 22 deletions
diff --git a/arm_compute/core/CL/kernels/CLCol2ImKernel.h b/arm_compute/core/CL/kernels/CLCol2ImKernel.h index bd86da1b5e..24d0fdd914 100644 --- a/arm_compute/core/CL/kernels/CLCol2ImKernel.h +++ b/arm_compute/core/CL/kernels/CLCol2ImKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -72,6 +72,16 @@ public: * @param[in] convolved_dims Output convolved dimensions. */ void configure(const ICLTensor *input, ICLTensor *output, std::pair<unsigned int, unsigned int> convolved_dims); + /** Static function to check if given info will lead to a valid configuration of @ref CLCol2ImKernel + * + * @param[in] input The input tensor to convert. Data types supported: QS8/QS16/QASYMM8/F16/F32 + * @param[in] output The output tensor. 3 lower dimensions represent a single output [width, height, OFM], + * while the rest represent batch of outputs. Data types supported: Same as @p input + * @param[in] convolved_dims Output convolved dimensions. + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, std::pair<unsigned int, unsigned int> convolved_dims); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h index 8f73d8c2c3..dc84a40ca8 100644 --- a/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h +++ b/arm_compute/core/CL/kernels/CLGEMMMatrixAdditionKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -57,6 +57,15 @@ public: * @param[in] beta Weight of matrix C */ void configure(const ICLTensor *input, ICLTensor *output, float beta); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMMatrixAdditionKernel. + * + * @param[in] input Input tensor (Matrix C). Data types supported: QS8/QS16/F16/F32 + * @param[in] output Output tensor. If this kernel is used to finalize the GEMM result (alpha * AB + beta * C), output must contain the result obtained by @ref CLGEMMMatrixMultiplyKernel. Data type supported: same as @p input + * @param[in] beta Weight of matrix C + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const float beta); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h index e38e7e8a49..1ad302eedb 100644 --- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h @@ -77,9 +77,6 @@ public: * @param[in] has_bias In case biases are provided expands the matrix with 1. */ void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); - - // Inherited methods overridden: - void run(const Window &window, cl::CommandQueue &queue) override; /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], @@ -94,6 +91,9 @@ public: */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + // Inherited methods overridden: + void run(const Window &window, cl::CommandQueue &queue) override; + private: /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input) * diff --git a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h index 6c84ded49e..b9ede12e3d 100644 --- a/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h +++ b/arm_compute/core/CL/kernels/CLWeightsReshapeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -43,7 +43,6 @@ public: CLWeightsReshapeKernel &operator=(CLWeightsReshapeKernel &&) = default; /** Default destructor */ ~CLWeightsReshapeKernel() = default; - /** Set the input and output of the kernel. * * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, @@ -54,6 +53,18 @@ public: * @param[out] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input */ void configure(const ICLTensor *input, const ICLTensor *biases, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLWeightsReshapeKernel + * + * @param[in] input The input tensor to convert. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM] if shared, + * and 5D tensor with dimensions [kernel_x, kernel_y, IFM, OFM, num_patches] if unshared. Data types supported: QS8/QS16/QASYMM8/F16/F32 + * @param[in] biases The shared biases tensor to append. Bias is 1D tensor with dimensions [OFM] if shared and 2D tensor with + * dimensions [OFM, num_patches] if unshared. Data types supported: Same as @p input + * @warning Appending biases to weights reshaped matrix is not supported for quantized asymmetric types. + * @param[in] output The output tensor. Should be a 2D Tensor. Data types supported: Same as @p input + * + * @return a status + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *biases, const ITensorInfo *output); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index e51c6bbe98..c53ac4c71f 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -40,6 +40,17 @@ inline TensorShape compute_permutation_output_shape(const ITensorInfo &input, co permute(output_shape, perm); return output_shape; } +inline TensorShape compute_weights_reshaped_shape(const ITensorInfo &weights, bool has_bias = false) +{ + // Calculate output shape + TensorShape weights_reshaped{ weights.tensor_shape() }; + weights_reshaped.collapse(3); + const size_t tmp_dim = weights_reshaped[0]; + weights_reshaped.set(0, weights_reshaped[1]); + weights_reshaped.set(1, tmp_dim + (has_bias ? 1 : 0)); + + return weights_reshaped; +} inline TensorShape compute_interleaved_shape(const ITensorInfo &a, int mult_interleave4x4_height = 1) { // The interleaved output matrix will have the following shape: [ a_height * W, ceil(a_width / W) ] where W = 4 * mult_interleave4x4_height @@ -101,6 +112,15 @@ inline TensorShape compute_im2col_shape(const ITensorInfo &input) return shape_im2col; } +inline TensorShape compute_col2im_shape(const ITensorInfo &input, std::pair<unsigned int, unsigned int> convolved_dims) +{ + TensorShape col2im_shape{ input.tensor_shape() }; + col2im_shape.set(0, convolved_dims.first); + col2im_shape.set(1, convolved_dims.second); + col2im_shape.set(2, input.tensor_shape()[0]); + + return col2im_shape; +} inline TensorShape compute_transposed_shape(const ITensorInfo &input) { TensorShape shape_transposed{ input.tensor_shape() }; diff --git a/arm_compute/runtime/CL/functions/CLGEMM.h b/arm_compute/runtime/CL/functions/CLGEMM.h index 0f144915d7..2e82457ee2 100644 --- a/arm_compute/runtime/CL/functions/CLGEMM.h +++ b/arm_compute/runtime/CL/functions/CLGEMM.h @@ -72,6 +72,20 @@ public: * in case matrix A and matrix B have been already transformed. */ void configure(const ICLTensor *a, const ICLTensor *b, const ICLTensor *c, ICLTensor *output, float alpha, float beta, const GEMMInfo &gemm_info = GEMMInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMM. + * + * @param[in] a First input tensor (Matrix or Vector A). Data types supported: QS8/QS16/F16/F32 + * @param[in] b Second input tensor (Matrix B). Data type supported: same as @p a. + * @param[in] c Third input tensor (Matrix C). It can be a nullptr if just the multiplication between @p a and @p b is needed. Data type supported: same as @p a. + * @param[out] output Output tensor. Data type supported: same as @p a + * @param[in] alpha Weight of the matrix product + * @param[in] beta Weight of matrix C + * @param[in] gemm_info (Optional) Specifies if the matrix A and/or matrix B have been reshaped and + * if the reshape of matrix B should happen only for the first run + * + * @return a status + */ + static Status validate(const ITensorInfo *a, const ITensorInfo *b, const ICLTensor *c, const ITensorInfo *output, const float alpha, const float beta, const GEMMInfo &gemm_info = GEMMInfo()); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index 7126688f8b..24029509b8 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -58,14 +58,22 @@ public: CLConvolutionLayerReshapeWeights(std::shared_ptr<IMemoryManager> memory_manager = nullptr); /** Set the input and output tensors. * - * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. - * Data type supported: QS8/QASYMM8/QS16/F16/F32. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. - * @param[out] output Destination tensor. Data types supported: Same as @p weights. - * @param[in] transpose1xW True if the weights are to undergo a 1xW transposition after reshaping (in case of GEMM operation), false otherwise. - * Data types supported: Same as @p weights. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QS8/QASYMM8/QS16/F16/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[out] output Destination tensor. Data types supported: Same as @p weights. */ - void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, bool transpose1xW); + void configure(const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayerReshapeWeights + * + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. + * Data type supported: QS8/QASYMM8/QS16/F16/F32. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[in] output Destination tensor. Data types supported: Same as @p weights. + * + * @return a status + */ + static Status validate(const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output); // Inherited methods overridden: void run() override; @@ -74,7 +82,6 @@ private: CLWeightsReshapeKernel _weights_reshape_kernel; CLGEMMTranspose1xWKernel _weights_transposed_kernel; CLTensor _weights_reshaped; - bool _transpose1xW; }; /** Basic function to compute the convolution layer. This function calls the following OpenCL kernels/functions: @@ -112,6 +119,22 @@ public: * tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input. */ void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer. + * + * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], + * while every optional dimension from 4 and above represent a batch of inputs. + * Data types supported: QS8/QASYMM8/QS16/F16/F32. + * @param[in] weights Weights tensor. Weights are 4D tensor with dimensions [kernel_x, kernel_y, IFM, OFM]. Data type supported: Same as @p input. + * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. + * Data type supported: Should match @p input data type, except for input of QASYMM8 type where biases should be of S32 type. + * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. + * Data types supported: Same as @p input. + * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights + * tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input. + */ + static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, + const WeightsInfo &weights_info = WeightsInfo()); // Inherited methods overridden: void run() override; @@ -123,16 +146,23 @@ private: * @param weights Weights tensor. Data type supported: Same as @p input. * @param output Output tensor. Data types supported: Same as @p input, * except for input of QASYMM8 type where output should be of S32 type. - * @param is_interleaved_transposed Flag that signals if matrix is interleaved transposed */ - void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, bool is_interleaved_transposed, bool are_weights_reshaped); + void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines + * + * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. + * @param[in] weights Weights tensor. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data types supported: Same as @p input, + * except for input of QASYMM8 type where output should be of S32 type. + * + * @return a status + */ + static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output); private: CLMemoryGroup _memory_group; CLConvolutionLayerReshapeWeights _reshape_weights; CLIm2ColKernel _im2col_kernel; - CLGEMMInterleave4x4Kernel _interleave_kernel; - CLGEMMMatrixMultiplyKernel _mm_kernel; CLGEMM _mm_gemm; CLGEMMLowpMatrixMultiplyCore _mm_gemmlowp; CLGEMMLowpQuantizeDownInt32ToUint8ScaleByFixedPoint _gemmlowp_output_stage; @@ -145,9 +175,7 @@ private: CLTensor _gemm_output; CLTensor _tmp_output; - bool _are_weights_reshaped; bool _is_quantized; - bool _is_interleaved_transposed; }; } #endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */ diff --git a/arm_compute/runtime/NEON/functions/NEGEMM.h b/arm_compute/runtime/NEON/functions/NEGEMM.h index 4b0614badc..f2b6ef77bd 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMM.h +++ b/arm_compute/runtime/NEON/functions/NEGEMM.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * |