diff options
Diffstat (limited to 'arm_compute')
-rw-r--r-- | arm_compute/core/TensorShape.h | 14 | ||||
-rw-r--r-- | arm_compute/core/utils/misc/ShapeCalculator.h | 1 | ||||
-rw-r--r-- | arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h | 27 |
3 files changed, 31 insertions, 11 deletions
diff --git a/arm_compute/core/TensorShape.h b/arm_compute/core/TensorShape.h index 0c3d9414e1..0340e1a644 100644 --- a/arm_compute/core/TensorShape.h +++ b/arm_compute/core/TensorShape.h @@ -136,6 +136,20 @@ public: // Make sure all empty dimensions are filled with 1 std::fill(_id.begin() + _num_dimensions, _id.end(), 1); } + /** Shifts right the tensor shape increasing its dimensions + * + * @param[in] step Rotation step + */ + void shift_right(size_t step) + { + ARM_COMPUTE_ERROR_ON(step > TensorShape::num_max_dimensions - num_dimensions()); + + std::rotate(begin(), begin() + TensorShape::num_max_dimensions - step, end()); + _num_dimensions += step; + + // Correct number dimensions to ignore trailing dimensions of size 1 + apply_dimension_correction(); + } /** Return a copy with collapsed dimensions starting from a given point. * diff --git a/arm_compute/core/utils/misc/ShapeCalculator.h b/arm_compute/core/utils/misc/ShapeCalculator.h index f64cf9d6ae..115cbe688d 100644 --- a/arm_compute/core/utils/misc/ShapeCalculator.h +++ b/arm_compute/core/utils/misc/ShapeCalculator.h @@ -110,6 +110,7 @@ inline TensorShape compute_reductionB_shape(const ITensorInfo &a) inline TensorShape compute_col2im_shape(const ITensorInfo &input, std::pair<unsigned int, unsigned int> convolved_dims) { TensorShape col2im_shape{ input.tensor_shape() }; + col2im_shape.shift_right(1); col2im_shape.set(0, convolved_dims.first); col2im_shape.set(1, convolved_dims.second); col2im_shape.set(2, input.tensor_shape()[0]); diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index 3dde52989b..2c1f7a9d5e 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -158,22 +158,24 @@ public: private: /** Configures the appropriate matrix multiply routine * - * @param input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. - * @param weights Weights tensor. Data type supported: Same as @p input. - * @param output Output tensor. Data types supported: Same as @p input, - * except for input of QASYMM8 type where output should be of S32 type. + * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. + * @param[in] weights Weights tensor. Data type supported: Same as @p input. + * @param[in, out] output Output tensor. Data types supported: Same as @p input, + * except for input of QASYMM8 type where output should be of S32 type. + * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) */ - void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output); + void configure_mm(const ICLTensor *input, const ICLTensor *weights, ICLTensor *output, int gemm_3d_depth = 1); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer matrix multiply routines * - * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. - * @param[in] weights Weights tensor. Data type supported: Same as @p input. - * @param[in] output Output tensor. Data types supported: Same as @p input, - * except for input of QASYMM8 type where output should be of S32 type. + * @param[in] input Input tensor. Data types supported: QS8/QASYMM8/QS16/F16/F32. + * @param[in] weights Weights tensor. Data type supported: Same as @p input. + * @param[in] output Output tensor. Data types supported: Same as @p input, + * except for input of QASYMM8 type where output should be of S32 type. + * @param[in] gemm_3d_depth (Optional) Depth of GEMM 3D (Defaults to 1) * * @return a status */ - static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output); + static Status validate_mm(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *output, int gemm_3d_depth = 1); private: CLMemoryGroup _memory_group; @@ -192,9 +194,12 @@ private: CLTensor _gemm_output; CLTensor _tmp_output; + DataLayout _data_layout; + + bool _skip_im2col; bool _is_quantized; bool _is_activationlayer_enabled; bool _is_prepared; }; -} +} // namespace arm_compute #endif /* __ARM_COMPUTE_CLGEMMCONVOLUTIONLAYER_H__ */ |