From 7da29b6b12ff319ed2b6e2c46588dfa1991556fb Mon Sep 17 00:00:00 2001 From: Alex Gilday Date: Fri, 23 Mar 2018 14:16:00 +0000 Subject: COMPMID-1017: Implement dilated convolution in NEON, OpenCL, and GC Change-Id: If4626ec9e215e14dffe22e80812da5bac84a52e2 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/125734 Reviewed-by: Anthony Barbier Tested-by: Jenkins --- arm_compute/core/CL/kernels/CLIm2ColKernel.h | 6 +- .../core/GLES_COMPUTE/kernels/GCIm2ColKernel.h | 6 +- arm_compute/core/NEON/kernels/NEIm2ColKernel.h | 7 +- arm_compute/core/Size2D.h | 18 ++ arm_compute/core/Utils.h | 4 +- .../runtime/CL/functions/CLConvolutionLayer.h | 10 +- .../runtime/CL/functions/CLGEMMConvolutionLayer.h | 7 +- .../GLES_COMPUTE/functions/GCConvolutionLayer.h | 4 +- .../runtime/NEON/functions/NEConvolutionLayer.h | 10 +- .../NEON/functions/NEGEMMConvolutionLayer.h | 7 +- src/core/CL/cl_kernels/im2col.cl | 7 +- src/core/CL/kernels/CLIm2ColKernel.cpp | 136 +++++----- .../GLES_COMPUTE/cs_shaders/convolution_layer.cs | 5 +- src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp | 12 +- src/core/NEON/kernels/NEIm2ColKernel.cpp | 45 ++-- src/core/Utils.cpp | 11 +- src/runtime/CL/functions/CLConvolutionLayer.cpp | 18 +- .../CL/functions/CLGEMMConvolutionLayer.cpp | 16 +- .../GLES_COMPUTE/functions/GCConvolutionLayer.cpp | 7 +- src/runtime/NEON/functions/NEConvolutionLayer.cpp | 19 +- .../NEON/functions/NEGEMMConvolutionLayer.cpp | 17 +- tests/benchmark/CL/DirectConvolutionLayer.cpp | 4 +- .../GLES_COMPUTE/DirectConvolutionLayer.cpp | 4 +- tests/benchmark/NEON/ConvolutionLayer.cpp | 3 +- tests/benchmark/NEON/DirectConvolutionLayer.cpp | 4 +- tests/benchmark/fixtures/ConvolutionLayerFixture.h | 4 +- .../fixtures/DirectConvolutionLayerFixture.h | 101 +++++++ tests/benchmark/fixtures/WinogradLayerFixture.h | 100 +++++++ tests/datasets/ConvolutionLayerDataset.h | 25 +- tests/datasets/DilatedConvolutionLayerDataset.h | 81 ++++++ tests/validation/CL/ConvolutionLayer.cpp | 4 +- tests/validation/CL/DilatedConvolutionLayer.cpp | 289 +++++++++++++++++++++ tests/validation/CL/LocallyConnected.cpp | 4 +- tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp | 4 +- tests/validation/NEON/ConvolutionLayer.cpp | 4 +- tests/validation/NEON/DilatedConvolutionLayer.cpp | 274 +++++++++++++++++++ tests/validation/NEON/LocallyConnected.cpp | 4 +- .../validation/fixtures/ConvolutionLayerFixture.h | 32 ++- .../fixtures/DirectConvolutionLayerFixture.h | 14 +- .../DirectConvolutionLayerTensorShiftFixture.h | 14 +- .../validation/fixtures/LocallyConnectedFixture.h | 5 +- tests/validation/fixtures/WinogradLayerFixture.h | 4 +- tests/validation/reference/Convolution3d.h | 18 +- tests/validation/reference/ConvolutionLayer.cpp | 21 +- tests/validation/reference/ConvolutionLayer.h | 5 +- 45 files changed, 1172 insertions(+), 222 deletions(-) create mode 100644 tests/benchmark/fixtures/DirectConvolutionLayerFixture.h create mode 100644 tests/benchmark/fixtures/WinogradLayerFixture.h create mode 100644 tests/datasets/DilatedConvolutionLayerDataset.h create mode 100644 tests/validation/CL/DilatedConvolutionLayer.cpp create mode 100644 tests/validation/NEON/DilatedConvolutionLayer.cpp diff --git a/arm_compute/core/CL/kernels/CLIm2ColKernel.h b/arm_compute/core/CL/kernels/CLIm2ColKernel.h index 1ad302eedb..43812e42a3 100644 --- a/arm_compute/core/CL/kernels/CLIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLIm2ColKernel.h @@ -75,8 +75,9 @@ public: * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref CLIm2ColKernel * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], @@ -86,10 +87,11 @@ public: * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h index c376a3d17b..c2d763fd46 100644 --- a/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h +++ b/arm_compute/core/GLES_COMPUTE/kernels/GCIm2ColKernel.h @@ -76,8 +76,9 @@ public: * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + void configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overridden: void run(const Window &window) override; @@ -91,10 +92,11 @@ public: * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] has_bias In case biases are provided expands the matrix with 1. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return a status */ - static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias); + static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation = Size2D(1U, 1U)); private: /** Run the reshape kernel optimised for the special case (stride is 1, padding is 0 and kernel's low 3 dimensions are same as input) diff --git a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h index 1659b725bb..ecfce2436d 100644 --- a/arm_compute/core/NEON/kernels/NEIm2ColKernel.h +++ b/arm_compute/core/NEON/kernels/NEIm2ColKernel.h @@ -85,9 +85,10 @@ public: * @param[in] has_bias In case biases are provided expands the matrix with 1. * @param[in] is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments * @param[in] is_flatten (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ void configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, bool is_fully_connected = false, bool is_flatten = false); + bool has_bias, bool is_fully_connected = false, bool is_flatten = false, const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref NEIm2ColKernel * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM], @@ -99,11 +100,12 @@ public: * @param[in] has_bias In case biases are provided expands the matrix with 1. * @param[in] is_fully_connected Determines whether this kernel will be called by @ref NEFullyConnectedLayer in order to validate the arguments * @param[in] is_flatten (Optional) Determines whether this kernel will be called by @ref NEFlattenLayer in order to validate the arguments + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, bool is_fully_connected, bool is_flatten = false); + bool has_bias, bool is_fully_connected, bool is_flatten = false, const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overridden: void run(const Window &window, const ThreadInfo &info) override; @@ -135,6 +137,7 @@ private: unsigned int _kernel_width; unsigned int _kernel_height; bool _has_bias; + Size2D _dilation; }; } // namespace arm_compute #endif /*__ARM_COMPUTE_NEIM2COLKERNEL_H__ */ diff --git a/arm_compute/core/Size2D.h b/arm_compute/core/Size2D.h index 37c4ebd041..ff7466d56f 100644 --- a/arm_compute/core/Size2D.h +++ b/arm_compute/core/Size2D.h @@ -70,6 +70,24 @@ public: return support::cpp11::to_string(width) + std::string("x") + support::cpp11::to_string(height); } + /** Semantic accessor for width as x. + * + * @return x. + */ + size_t x() const + { + return width; + } + + /** Semantic accessor for height as y. + * + * @return y. + */ + size_t y() const + { + return height; + } + public: size_t width = {}; /**< Width of the image region or rectangle */ size_t height = {}; /**< Height of the image region or rectangle */ diff --git a/arm_compute/core/Utils.h b/arm_compute/core/Utils.h index f1e357293f..23668e0169 100644 --- a/arm_compute/core/Utils.h +++ b/arm_compute/core/Utils.h @@ -893,12 +893,14 @@ const std::pair deconvolution_output_dimensions(unsi * @param[in] kernel_width Kernel width. * @param[in] kernel_height Kernel height. * @param[in] pad_stride_info Pad and stride information. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return A pair with the new width in the first position and the new height in the second. */ const std::pair scaled_dimensions(unsigned int width, unsigned int height, unsigned int kernel_width, unsigned int kernel_height, - const PadStrideInfo &pad_stride_info); + const PadStrideInfo &pad_stride_info, + const Size2D &dilation = Size2D(1U, 1U)); /** Convert a tensor format into a string. * diff --git a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h index 53d59c3176..5f383b28ab 100644 --- a/arm_compute/runtime/CL/functions/CLConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLConvolutionLayer.h @@ -56,8 +56,10 @@ public: * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + void configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref CLConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -69,11 +71,12 @@ public: * Data types supported: Same as @p input. * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo()); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will return the convolution called by @ref CLConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -86,11 +89,12 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. Data type supported: Same as @p input. * @param[in] gpu_target Specifies the @p GPUTarget. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return a status */ static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const GPUTarget gpu_target); + const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h index 0548ce7be7..dc1211a51a 100644 --- a/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h @@ -116,8 +116,10 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights * tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + void configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref CLGEMMConvolutionLayer. * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -131,11 +133,12 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with CLWeightsReshapeKernel. If this is not part of the fully connected layer the weights * tensor has also been transposed with CLGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo()); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h index 2bac982d0c..198fa7ba94 100644 --- a/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h +++ b/arm_compute/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.h @@ -99,8 +99,10 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with GCWeightsReshapeKernel. If this is not part of the fully connected layer the weights * tensor has also been transposed with GCGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + void configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h index 6ab1350b25..3e6e5abd28 100644 --- a/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEConvolutionLayer.h @@ -61,8 +61,10 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + void configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref NEConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -76,11 +78,12 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo()); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will return the convolution called by @ref NEConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -94,11 +97,12 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return the Convolution Method Hint */ static ConvolutionMethod get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo()); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overridden: void run() override; diff --git a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h index 4ae8ee1fb3..43e9304414 100644 --- a/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h +++ b/arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h @@ -115,8 +115,10 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). */ - void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo()); + void configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info = WeightsInfo(), + const Size2D &dilation = Size2D(1U, 1U)); /** Static function to check if given info will lead to a valid configuration of @ref NEGEMMConvolutionLayer * * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], @@ -130,11 +132,12 @@ public: * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. * @param[in] weights_info Specifies if the weights tensor has been reshaped with NEWeightsReshapeKernel. If this is not part of the fully connected layer the weights * tensor has also been transposed with NEGEMMTranspose1xWKernel. Data type supported: Same as @p input. + * @param[in] dilation (Optional) Dilation, in elements, across x and y. Defaults to (1, 1). * * @return a status */ static Status validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info = WeightsInfo()); + const WeightsInfo &weights_info = WeightsInfo(), const Size2D &dilation = Size2D(1U, 1U)); // Inherited methods overridden: void run() override; diff --git a/src/core/CL/cl_kernels/im2col.cl b/src/core/CL/cl_kernels/im2col.cl index 75d99bda85..1e85e1b303 100644 --- a/src/core/CL/cl_kernels/im2col.cl +++ b/src/core/CL/cl_kernels/im2col.cl @@ -680,6 +680,7 @@ __kernel void im2col_generic_padx0_pady0_dchw( * @note The pad_left, pad_right, pad_top and pad_bottom must be passed at compile time using -DPAD_LEFT, -DPAD_RIGHT, -DPAD_TOP and -DPAD_BOTTOM: e.g. -DPAD_LEFT=1, -DPAD_RIGHT=2, -DPAD_TOP=3 and -DPAD_BOTTOM=2 * @note The zero value to store in case we load values out-of-bounds must be passed at compile time using -DPAD_VALUE: e.g. -DPAD_VALUE=0.0 * @note The stride along the X and Y directions must be passed at compile time using -DSTRIDE_X and -DSTRIDE_Y: e.g. -DSTRIDE_X=1 and -DSTRIDE_Y=1 + * @note The dilation_x and dilation_y must be passed at compile time using -DDILATION_X and -DDILATION_Y: e.g. -DDILATION_X=1, -DDILATION_Y=1 * @note In case biases will be added to the convolution -DHAS_BIAS has to be passed to append the final matrix with 1 in each row. * * @param[in] src_ptr Pointer to the source tensor. Supported data types: QS8/QASYMM8/QS16/F16/F32 @@ -722,10 +723,12 @@ __kernel void im2col_generic_dchw( __global DATA_TYPE *output_ptr = ((__global DATA_TYPE *)(dst_ptr + dst_offset_first_element_in_bytes + yo * dst_stride_y + batch * dst_stride_w)) + xo; // Linearize convolution elements - for(int y = yi, y_e = yi + KERNEL_HEIGHT; y < y_e; ++y) + for(int yk = 0; yk < KERNEL_HEIGHT; ++yk) { - for(int x = xi, x_e = xi + KERNEL_WIDTH; x < x_e; ++x, ++output_ptr) + int y = yi + yk * DILATION_Y; + for(int xk = 0; xk < KERNEL_WIDTH; ++xk, ++output_ptr) { + int x = xi + xk * DILATION_X; #if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0 *output_ptr = *((__global DATA_TYPE *)(input_ptr + x * src_stride_x + y * src_stride_y)); #else // PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0 diff --git a/src/core/CL/kernels/CLIm2ColKernel.cpp b/src/core/CL/kernels/CLIm2ColKernel.cpp index 9bc4787384..cc19d3c263 100644 --- a/src/core/CL/kernels/CLIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLIm2ColKernel.cpp @@ -41,11 +41,12 @@ using namespace arm_compute; namespace { -Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias) +Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(output); + ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1)); // Checks performed when output is configured if(output->total_size() != 0) @@ -63,12 +64,12 @@ CLIm2ColKernel::CLIm2ColKernel() { } -void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Perform validation step - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), has_bias, dilation)); _input = input; _output = output; @@ -107,7 +108,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_dims.width, kernel_dims.height, - conv_info); + conv_info, dilation); build_opts.add_option("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width)); build_opts.add_option("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height)); @@ -122,77 +123,82 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const build_opts.add_option("-DPAD_BOTTOM=" + support::cpp11::to_string(conv_info.pad_bottom())); build_opts.add_option("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); build_opts.add_option("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); + build_opts.add_option("-DDILATION_X=" + support::cpp11::to_string(dilation.x())); + build_opts.add_option("-DDILATION_Y=" + support::cpp11::to_string(dilation.y())); build_opts.add_option_if_else(is_data_type_quantized(data_type), "-DPAD_VALUE=" + support::cpp11::to_string(input->info()->quantization_info().offset), "-DPAD_VALUE=0"); const bool squared_im2col = kernel_dims.width == kernel_dims.height; - if(squared_im2col && !is_data_type_fixed_point(data_type)) + if(dilation == Size2D(1U, 1U)) { - // Check if we can run an optimized im2col - switch(kernel_dims.width) + if(squared_im2col && !is_data_type_fixed_point(data_type)) { - case 1: - // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false - if(conv_info.stride().first == 1 && !conv_info.has_padding()) - { - // Set hint for LWS + // Check if we can run an optimized im2col + switch(kernel_dims.width) + { + case 1: + // Optimized im2col1x1 if stride_x = 1 and conv_info.has_padding() = false + if(conv_info.stride().first == 1 && !conv_info.has_padding()) + { + // Set hint for LWS + _lws_hint = cl::NDRange(1, 1, 8); + _num_elems_processed_per_iteration = 4; + is_optimized_path = true; + kernel_name = "im2col1x1_stridex1_dchw"; + } + break; + case 3: _lws_hint = cl::NDRange(1, 1, 8); - _num_elems_processed_per_iteration = 4; + _num_elems_processed_per_iteration = 1; is_optimized_path = true; - kernel_name = "im2col1x1_stridex1_dchw"; - } - break; - case 3: - _lws_hint = cl::NDRange(1, 1, 8); - _num_elems_processed_per_iteration = 1; - is_optimized_path = true; - kernel_name = "im2col3x3_dchw"; - break; - case 5: - _num_elems_processed_per_iteration = 1; - is_optimized_path = true; - kernel_name = "im2col5x5_dchw"; - break; - case 11: - // Optimized im2col11x11 if pad_x = pad_y = 0 - if(!conv_info.has_padding()) - { + kernel_name = "im2col3x3_dchw"; + break; + case 5: _num_elems_processed_per_iteration = 1; is_optimized_path = true; - kernel_name = "im2col11x11_padx0_pady0_dchw"; - } - break; - default: - is_optimized_path = false; - break; - } - } - else if(kernel_dims.width > 1 && !conv_info.has_padding()) - { - _num_elems_processed_per_iteration = 1; - kernel_name = "im2col_generic_padx0_pady0_dchw"; - - // Optimized im2col is performed using one or more vector operations with the specified vector size - // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4 - // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3. - // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3. - // Using the vector size of 8, however, may be faster. - size_t vector_size = 4; - // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0 - // is used instead.) - if(kernel_dims.width < vector_size) - { - vector_size = kernel_dims.width; + kernel_name = "im2col5x5_dchw"; + break; + case 11: + // Optimized im2col11x11 if pad_x = pad_y = 0 + if(!conv_info.has_padding()) + { + _num_elems_processed_per_iteration = 1; + is_optimized_path = true; + kernel_name = "im2col11x11_padx0_pady0_dchw"; + } + break; + default: + is_optimized_path = false; + break; + } } - // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost. - if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11) + else if(kernel_dims.width > 1 && !conv_info.has_padding()) { - _lws_hint = cl::NDRange(1, 1, 1); - vector_size = 8; + _num_elems_processed_per_iteration = 1; + kernel_name = "im2col_generic_padx0_pady0_dchw"; + + // Optimized im2col is performed using one or more vector operations with the specified vector size + // and a remainder. For example, for 5x5 convolutions, im2col is performed using vectors of size 4 + // and scalars; for 7x7 convolutions, using vectors of size 4 and vectors of size 3. + // Using the vector size of 4 is always safe since OpenCL supports vectors of size 2 and 3. + // Using the vector size of 8, however, may be faster. + size_t vector_size = 4; + // For 2x2 convolutions, use vectors of size 2. (For 3x3 convolutions, im2col_kernel3x3_padx0_pady0 + // is used instead.) + if(kernel_dims.width < vector_size) + { + vector_size = kernel_dims.width; + } + // Local work size and vector size optimized for the 11x11 AlexNet convolution on Bifrost. + if(gpu_target_is_in(gpu_target, GPUTarget::G71, GPUTarget::G72) && kernel_dims.width == 11) + { + _lws_hint = cl::NDRange(1, 1, 1); + vector_size = 8; + } + const size_t width_mod_vector_size = kernel_dims.width % vector_size; + build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size)); + build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size)); } - const size_t width_mod_vector_size = kernel_dims.width % vector_size; - build_opts.add_option("-DVECTOR_SIZE=" + support::cpp11::to_string(vector_size)); - build_opts.add_option("-DWIDTH_MOD_VECTOR_SIZE=" + support::cpp11::to_string(width_mod_vector_size)); } _run_func = &CLIm2ColKernel::run_generic; } @@ -206,7 +212,7 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const // Create kernel _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); - // Configure kernel window + // Configure kernel window Window win; if(is_optimized_path) { @@ -250,12 +256,12 @@ void CLIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const _config_id += support::cpp11::to_string(output->info()->dimension(1)); } -Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +Status CLIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_UNUSED(kernel_dims); ARM_COMPUTE_UNUSED(conv_info); ARM_COMPUTE_UNUSED(has_bias); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, has_bias, dilation)); return Status{}; } diff --git a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs index 2701f5b262..ad3f14d442 100644 --- a/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs +++ b/src/core/GLES_COMPUTE/cs_shaders/convolution_layer.cs @@ -164,6 +164,7 @@ void main() * @note STRIDE_X/STRIDE_Y must be passed for stride info, e.g. "#define STRIDE_X xxx" * @note CONVOLVED_WIDTH/CONVOLVED_HEIGHT must be passed for convolved dimension, e.g. "#define CONVOLVED_WIDTH xxx" * @note SRC_WIDTH/SRC_HEIGHT must be passed for input dimension, e.g. "#define SRC_WIDTH xxx" + * @note DILATION_X/DILATION_Y must be passed for dilation sizes, e.g. "#define DILATION_X xxx" * @note In case biases will be added to the convolution "#define HAS_BIAS" has to be passed to append the final matrix with 1 in each row. * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 @@ -210,9 +211,9 @@ void main(void) uint src_pos = 0u; // Linearize convolution elements - for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT); y < y_e; ++y) + for(uint y = yi, y_e = yi + uint(KERNEL_HEIGHT) * uint(DILATION_Y); y < y_e; y += uint(DILATION_Y)) { - for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH); x < x_e; ++x, TENSOR_OFFSET_ADVANCE(dst_iter, 1u)) + for(uint x = xi, x_e = xi + uint(KERNEL_WIDTH) * uint(DILATION_X); x < x_e; x += uint(DILATION_X), TENSOR_OFFSET_ADVANCE(dst_iter, 1u)) { #if PAD_LEFT == 0 && PAD_TOP == 0 && PAD_RIGHT == 0 && PAD_BOTTOM == 0 src_pos = TENSOR_OFFSET_ADVANCE_IN_BYTES(src_iter, x * src_attrs.stride_x + y * src_attrs.stride_y); diff --git a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp index 47bfebcc09..eb790471fb 100644 --- a/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp +++ b/src/core/GLES_COMPUTE/kernels/GCIm2ColKernel.cpp @@ -65,7 +65,7 @@ GCIm2ColKernel::GCIm2ColKernel() { } -void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); @@ -98,7 +98,8 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const && (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), output->info()->tensor_shape().cbegin() + 1)) - && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()); + && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()) + && (dilation == Size2D(1U, 1U)); std::string kernel_name = "im2col_generic"; if(!run_img2col_reduced) @@ -111,7 +112,7 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const build_opts.emplace("#define IM2COL_GENERIC"); _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_dims.width, kernel_dims.height, - conv_info); + conv_info, dilation); _num_elems_processed_per_iteration = 2; build_opts.emplace("#define KERNEL_WIDTH " + support::cpp11::to_string(kernel_dims.width)); @@ -127,6 +128,8 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const build_opts.emplace("#define PAD_BOTTOM " + support::cpp11::to_string(conv_info.pad_bottom())); build_opts.emplace("#define SRC_WIDTH " + support::cpp11::to_string(input->info()->dimension(0))); build_opts.emplace("#define SRC_HEIGHT " + support::cpp11::to_string(input->info()->dimension(1))); + build_opts.emplace("#define DILATION_X " + support::cpp11::to_string(dilation.x())); + build_opts.emplace("#define DILATION_Y " + support::cpp11::to_string(dilation.y())); _run_func = &GCIm2ColKernel::run_generic; } @@ -206,11 +209,12 @@ void GCIm2ColKernel::configure(const IGCTensor *input, IGCTensor *output, const IGCKernel::configure(win); } -Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) +Status GCIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias, const Size2D &dilation) { ARM_COMPUTE_UNUSED(kernel_dims); ARM_COMPUTE_UNUSED(conv_info); ARM_COMPUTE_UNUSED(has_bias); + ARM_COMPUTE_UNUSED(dilation); ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output)); return Status{}; } diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index dee1608c43..348722c55d 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -45,12 +45,13 @@ using namespace arm_compute; namespace { Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, bool is_fully_connected, bool is_flatten) + bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_FIXED_POINT_POSITION(input, output); ARM_COMPUTE_RETURN_ERROR_ON(input->data_type() == DataType::QASYMM8 && has_bias); + ARM_COMPUTE_RETURN_ERROR_ON((dilation.x() < 1) || (dilation.y() < 1)); if(is_flatten) /* Called by FlattenLayer */ { @@ -59,7 +60,7 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *output, c } else if(!is_fully_connected) /* Called by ConvolutionLayer */ { - std::pair out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info); + std::pair out_dims = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_dims.width, kernel_dims.height, conv_info, dilation); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(0) != (input->dimension(2) * kernel_dims.area() + (has_bias ? 1 : 0))); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(1) != (out_dims.first * out_dims.second)); ARM_COMPUTE_RETURN_ERROR_ON(output->dimension(2) != 1); @@ -91,11 +92,13 @@ inline void linearize_volume(const uint8_t *const in_ptr, int input_stride_y, int input_stride_z, int fixed_point_position, - int pad_value) + int pad_value, + int dilation_x, + int dilation_y) { const int kernel_size2 = kernel_width * kernel_height; - const int x_e = top_left_x + kernel_width; - const int y_e = top_left_y + kernel_height; + const int x_e = top_left_x + kernel_width * dilation_x; + const int y_e = top_left_y + kernel_height * dilation_y; // Linearize volume int d = 0; @@ -104,12 +107,12 @@ inline void linearize_volume(const uint8_t *const in_ptr, // 2) to have an optimized im2col for the first convolution layer where usually we have 3 IFMs for(; d <= (kernel_depth - 3); d += 3) { - for(int y = top_left_y; y < y_e; ++y) + for(int y = top_left_y; y < y_e; y += dilation_y) { if((y < 0 || y >= input_h) && has_pads) { // All the values will be the offset (will be zeros when not quantized) - for(int x = top_left_x; x < x_e; ++x, ++out_ptr) + for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr) { *(out_ptr + 0 * kernel_size2) = pad_value; *(out_ptr + 1 * kernel_size2) = pad_value; @@ -118,7 +121,7 @@ inline void linearize_volume(const uint8_t *const in_ptr, } else { - for(int x = top_left_x; x < x_e; ++x, ++out_ptr) + for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr) { if((x < 0 || x >= input_w) && has_pads) { @@ -141,7 +144,7 @@ inline void linearize_volume(const uint8_t *const in_ptr, // Left over for(; d < kernel_depth; d++) { - for(int y = top_left_y; y < y_e; ++y) + for(int y = top_left_y; y < y_e; y += dilation_y) { if((y < 0 || y >= input_h) && has_pads) { @@ -151,7 +154,7 @@ inline void linearize_volume(const uint8_t *const in_ptr, } else { - for(int x = top_left_x; x < x_e; ++x, ++out_ptr) + for(int x = top_left_x; x < x_e; x += dilation_x, ++out_ptr) { if((x < 0 || x >= input_w) && has_pads) { @@ -251,7 +254,9 @@ void NEIm2ColKernel::run_generic(const Window &window) input_stride_y, input_stride_z, _input->info()->fixed_point_position(), - offset); + offset, + _dilation.x(), + _dilation.y()); }, in, out); } @@ -309,27 +314,28 @@ void NEIm2ColKernel::run_reduced(const Window &window) } NEIm2ColKernel::NEIm2ColKernel() - : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false) + : _func(), _input(nullptr), _output(nullptr), _convolved_dims(), _conv_info(), _kernel_width(0), _kernel_height(0), _has_bias(false), _dilation(1U, 1U) { } void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, bool is_fully_connected, bool is_flatten) + bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, output); // Perform validation step ARM_COMPUTE_UNUSED(is_fully_connected, is_flatten); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(input->info(), output->info(), kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten, dilation)); _input = input; _output = output; _conv_info = conv_info; _kernel_width = kernel_dims.width; - _kernel_height = kernel_dims.height, + _kernel_height = kernel_dims.height; + _dilation = dilation; _convolved_dims = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), _kernel_width, _kernel_height, - _conv_info); + _conv_info, _dilation); _has_bias = has_bias; unsigned int stride_x = 0; @@ -340,7 +346,8 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size && (std::equal(input->info()->tensor_shape().cbegin() + 3, input->info()->tensor_shape().cend(), output->info()->tensor_shape().cbegin() + 1)) - && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()); + && ((stride_x == 1) && (stride_y == 1) && !conv_info.has_padding()) + && ((dilation.x() == 1) && (dilation.y() == 1)); Window window = calculate_max_window(*input->info(), Steps()); @@ -407,9 +414,9 @@ void NEIm2ColKernel::configure(const ITensor *input, ITensor *output, const Size } Status NEIm2ColKernel::validate(const ITensorInfo *input, const ITensorInfo *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, - bool has_bias, bool is_fully_connected, bool is_flatten) + bool has_bias, bool is_fully_connected, bool is_flatten, const Size2D &dilation) { - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(input, output, kernel_dims, conv_info, has_bias, is_fully_connected, is_flatten, dilation)); return Status{}; } diff --git a/src/core/Utils.cpp b/src/core/Utils.cpp index f4b45532cf..4a237f9daa 100644 --- a/src/core/Utils.cpp +++ b/src/core/Utils.cpp @@ -292,7 +292,8 @@ const std::pair arm_compute::deconvolution_output_di const std::pair arm_compute::scaled_dimensions(unsigned int width, unsigned int height, unsigned int kernel_width, unsigned int kernel_height, - const PadStrideInfo &pad_stride_info) + const PadStrideInfo &pad_stride_info, + const Size2D &dilation) { const unsigned int pad_left = pad_stride_info.pad_left(); const unsigned int pad_top = pad_stride_info.pad_top(); @@ -305,12 +306,12 @@ const std::pair arm_compute::scaled_dimensions(unsig switch(pad_stride_info.round()) { case DimensionRoundingType::FLOOR: - w = static_cast(std::floor((static_cast(width + pad_left + pad_right - kernel_width) / stride_x) + 1)); - h = static_cast(std::floor((static_cast(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1)); + w = static_cast(std::floor((static_cast(width + pad_left + pad_right - (dilation.x() * (kernel_width - 1) + 1)) / stride_x) + 1)); + h = static_cast(std::floor((static_cast(height + pad_top + pad_bottom - (dilation.y() * (kernel_height - 1) + 1)) / stride_y) + 1)); break; case DimensionRoundingType::CEIL: - w = static_cast(std::ceil((static_cast(width + pad_left + pad_right - kernel_width) / stride_x) + 1)); - h = static_cast(std::ceil((static_cast(height + pad_top + pad_bottom - kernel_height) / stride_y) + 1)); + w = static_cast(std::ceil((static_cast(width + pad_left + pad_right - (dilation.x() * (kernel_width - 1) + 1)) / stride_x) + 1)); + h = static_cast(std::ceil((static_cast(height + pad_top + pad_bottom - (dilation.y() * (kernel_height - 1) + 1)) / stride_y) + 1)); break; default: ARM_COMPUTE_ERROR("Unsupported rounding type"); diff --git a/src/runtime/CL/functions/CLConvolutionLayer.cpp b/src/runtime/CL/functions/CLConvolutionLayer.cpp index 1a486ce5c7..64bda93ff0 100644 --- a/src/runtime/CL/functions/CLConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLConvolutionLayer.cpp @@ -42,13 +42,14 @@ CLConvolutionLayer::CLConvolutionLayer(std::shared_ptr memory_ma { } -void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info)); + ARM_COMPUTE_ERROR_THROW_ON(CLConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation)); switch(CLConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, - weights_info, CLScheduler::get().target())) + weights_info, CLScheduler::get().target(), dilation)) { case ConvolutionMethod::DIRECT: { @@ -60,7 +61,7 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c case ConvolutionMethod::GEMM: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info); + f->configure(input, weights, biases, output, conv_info, weights_info, dilation); _function = std::move(f); break; } @@ -71,14 +72,14 @@ void CLConvolutionLayer::configure(ICLTensor *input, const ICLTensor *weights, c } Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); //Configure if the parameters match the direct convolution or the gemm-based const GPUTarget gpu_target = CLScheduler::get().target(); - switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target)) + switch(CLConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, gpu_target, dilation)) { case ConvolutionMethod::DIRECT: { @@ -89,7 +90,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo case ConvolutionMethod::GEMM: { // Validate gemm-based convolution layer - CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info); + CLGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation); break; } default: @@ -101,7 +102,7 @@ Status CLConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo } ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info, const GPUTarget gpu_target) + const WeightsInfo &weights_info, const GPUTarget gpu_target, const Size2D &dilation) { ARM_COMPUTE_UNUSED(input); ARM_COMPUTE_UNUSED(weights); @@ -110,6 +111,7 @@ ConvolutionMethod CLConvolutionLayer::get_convolution_method(const ITensorInfo * ARM_COMPUTE_UNUSED(conv_info); ARM_COMPUTE_UNUSED(weights_info); ARM_COMPUTE_UNUSED(gpu_target); + ARM_COMPUTE_UNUSED(dilation); return ConvolutionMethod::GEMM; } diff --git a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp index bc339f176f..e7ad62f5ff 100644 --- a/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLGEMMConvolutionLayer.cpp @@ -151,7 +151,8 @@ Status CLGEMMConvolutionLayer::validate_mm(const ITensorInfo *input, const ITens return Status{}; } -void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor *weights, const ICLTensor *biases, ICLTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -160,7 +161,8 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * biases != nullptr ? biases->info() : nullptr, output->info(), conv_info, - weights_info)); + weights_info, + dilation)); _is_quantized = is_data_type_quantized_asymmetric(input->info()->data_type()); @@ -187,7 +189,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * const unsigned int kernel_width = weights->info()->dimension(0); const unsigned int kernel_height = weights->info()->dimension(1); std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height, - conv_info); + conv_info, dilation); unsigned int mat_weights_cols = weights->info()->dimension(3); unsigned int mat_weights_rows = weights->info()->dimension(0) * weights->info()->dimension(1) * weights->info()->dimension(2) + bias_element; @@ -224,7 +226,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * _memory_group.manage(&_gemm_output); // Configure im2col - _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias); + _im2col_kernel.configure(input, &_im2col_output, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation); // Configure GEMM configure_mm(&_im2col_output, weights, &_gemm_output); @@ -260,7 +262,7 @@ void CLGEMMConvolutionLayer::configure(const ICLTensor *input, const ICLTensor * } Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, weights, output); ARM_COMPUTE_RETURN_ERROR_ON_MSG(weights_info.are_reshaped(), "Weights already reshaped are not supported!"); @@ -282,7 +284,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI const unsigned int kernel_width = weights->dimension(0); const unsigned int kernel_height = weights->dimension(1); - std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info); + std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, conv_info, dilation); unsigned int mat_weights_cols = weights->dimension(3); unsigned int mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + bias_element; @@ -298,7 +300,7 @@ Status CLGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI shape_im2col.set(2, 1); TensorInfo im2col_reshaped_info(shape_im2col, 1, dt, input->fixed_point_position()); im2col_reshaped_info.set_quantization_info(input->quantization_info()); - CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias); + CLIm2ColKernel::validate(input, &im2col_reshaped_info, Size2D(kernel_width, kernel_height), conv_info, append_bias, dilation); // Create GEMM output tensor TensorShape shape_gemm = im2col_reshaped_info.tensor_shape(); diff --git a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp index f4c073668a..c2b7e02284 100644 --- a/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp +++ b/src/runtime/GLES_COMPUTE/functions/GCConvolutionLayer.cpp @@ -102,7 +102,8 @@ void GCConvolutionLayer::configure_mm(const IGCTensor *input, const IGCTensor *w _mm_kernel.configure(input, weights, output, 1.f, is_interleaved_transposed); } -void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weights, const IGCTensor *biases, IGCTensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -136,7 +137,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig const unsigned int kernel_width = (_are_weights_reshaped) ? weights_info.kernel_size().first : weights->info()->dimension(0); const unsigned int kernel_height = (_are_weights_reshaped) ? weights_info.kernel_size().second : weights->info()->dimension(1); std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), kernel_width, kernel_height, - conv_info); + conv_info, dilation); // Check if its a "fully connected" convolution _is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1)); @@ -229,7 +230,7 @@ void GCConvolutionLayer::configure(const IGCTensor *input, const IGCTensor *weig input->info()->extend_padding(border_size); _fill_border.configure(input, border_size, BorderMode::CONSTANT, PixelValue(0)); // for PAD of im2col fp16: consider it as border } - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, dilation); // Configure matrix multiply if(run_interleaved) diff --git a/src/runtime/NEON/functions/NEConvolutionLayer.cpp b/src/runtime/NEON/functions/NEConvolutionLayer.cpp index d4421e8429..e659495b7c 100644 --- a/src/runtime/NEON/functions/NEConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEConvolutionLayer.cpp @@ -40,14 +40,15 @@ NEConvolutionLayer::NEConvolutionLayer(std::shared_ptr memory_ma { } -void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); - ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info)); + ARM_COMPUTE_ERROR_THROW_ON(NEConvolutionLayer::validate(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, weights_info, dilation)); switch(NEConvolutionLayer::get_convolution_method(input->info(), weights->info(), ((biases != nullptr) ? biases->info() : nullptr), output->info(), conv_info, - weights_info)) + weights_info, dilation)) { case ConvolutionMethod::WINOGRAD: { @@ -59,7 +60,7 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const case ConvolutionMethod::GEMM: { auto f = arm_compute::support::cpp14::make_unique(_memory_manager); - f->configure(input, weights, biases, output, conv_info, weights_info); + f->configure(input, weights, biases, output, conv_info, weights_info, dilation); _function = std::move(f); break; } @@ -77,9 +78,9 @@ void NEConvolutionLayer::configure(ITensor *input, const ITensor *weights, const } Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { - switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info)) + switch(NEConvolutionLayer::get_convolution_method(input, weights, biases, output, conv_info, weights_info, dilation)) { case ConvolutionMethod::WINOGRAD: //Validate Winograd @@ -87,7 +88,7 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo break; case ConvolutionMethod::GEMM: //Validate Gemm-based Convolution - NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info); + NEGEMMConvolutionLayer::validate(input, weights, biases, output, conv_info, weights_info, dilation); break; case ConvolutionMethod::DIRECT: //Validate Gemm-based Convolution @@ -101,12 +102,12 @@ Status NEConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo } ConvolutionMethod NEConvolutionLayer::get_convolution_method(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_UNUSED(output); ARM_COMPUTE_UNUSED(weights_info); if((input->data_type() == DataType::F32) && (weights->dimension(0) == 3) && (weights->dimension(1) == 3) && (weights->num_dimensions() <= 4) && (conv_info.stride().first == 1) - && (conv_info.stride().second == 1) && (biases != nullptr)) + && (conv_info.stride().second == 1) && (biases != nullptr) && (dilation == Size2D(1U, 1U))) { return ConvolutionMethod::WINOGRAD; } diff --git a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp index 3b8b4243e5..d9707d95e0 100644 --- a/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp +++ b/src/runtime/NEON/functions/NEGEMMConvolutionLayer.cpp @@ -170,7 +170,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf bool &are_weights_reshaped, unsigned int &kernel_width, unsigned int &kernel_height, bool &is_fully_connected_convolution, bool &is_interleaved, bool &is_quantized, unsigned int &mat_weights_cols, unsigned int &mat_weights_rows, - unsigned int &conv_w, unsigned int &conv_h) + unsigned int &conv_w, unsigned int &conv_h, const Size2D &dilation) { ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QS8, DataType::QASYMM8, DataType::QS16, DataType::F16, DataType::F32); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); @@ -205,7 +205,7 @@ Status validate_and_initialize_values(const ITensorInfo *input, const ITensorInf mat_weights_rows = weights->dimension(0) * weights->dimension(1) * weights->dimension(2) + (append_bias ? 1 : 0); std::tie(conv_w, conv_h) = scaled_dimensions(input->dimension(0), input->dimension(1), kernel_width, kernel_height, - conv_info); + conv_info, dilation); // Check if its a "fully connected" convolution is_fully_connected_convolution = ((conv_w == 1) && (conv_h == 1)); @@ -246,7 +246,8 @@ void NEGEMMConvolutionLayer::configure_mm(const ITensor *input, const ITensor *w } } -void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info) +void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weights, const ITensor *biases, ITensor *output, const PadStrideInfo &conv_info, const WeightsInfo &weights_info, + const Size2D &dilation) { // Perform validate step ARM_COMPUTE_ERROR_ON_NULLPTR(input, weights, output); @@ -262,7 +263,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig Status status = validate_and_initialize_values(input->info(), weights->info(), (biases == nullptr) ? nullptr : biases->info(), conv_info, weights_info, dt, _append_bias, _are_weights_reshaped, kernel_width, kernel_height, _is_fully_connected_convolution, _is_interleaved, _is_quantized, - mat_weights_cols, mat_weights_rows, conv_w, conv_h); + mat_weights_cols, mat_weights_rows, conv_w, conv_h, dilation); ARM_COMPUTE_ERROR_THROW_ON(status); @@ -362,7 +363,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig // Configure kernels // Configure im2col - _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias); + _input_im2col_kernel.configure(input, &_input_im2col_reshaped, Size2D(kernel_width, kernel_height), conv_info, _append_bias, false, false, dilation); // Configure matrix multiply if(run_optimised) @@ -420,7 +421,7 @@ void NEGEMMConvolutionLayer::configure(const ITensor *input, const ITensor *weig } Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorInfo *weights, const ITensorInfo *biases, const ITensorInfo *output, const PadStrideInfo &conv_info, - const WeightsInfo &weights_info) + const WeightsInfo &weights_info, const Size2D &dilation) { ARM_COMPUTE_UNUSED(output); @@ -439,7 +440,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI Status status = validate_and_initialize_values(input, weights, biases, conv_info, weights_info, dt, append_bias, are_weights_reshaped, kernel_width, kernel_height, is_fully_connected_convolution, is_interleaved, is_quantized, mat_weights_cols, mat_weights_rows, - conv_w, conv_h); + conv_w, conv_h, dilation); const Size2D kernel_weights = Size2D(kernel_width, kernel_height); @@ -517,7 +518,7 @@ Status NEGEMMConvolutionLayer::validate(const ITensorInfo *input, const ITensorI shape_im2col.set(1, mat_input_rows); shape_im2col.set(2, 1); TensorInfo im2_col_info = input->clone()->set_tensor_shape(shape_im2col); - ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false)); + ARM_COMPUTE_RETURN_ON_ERROR(NEIm2ColKernel::validate(input, &im2_col_info, kernel_weights, conv_info, append_bias, false, false, dilation)); // Create GEMM output tensor TensorShape shape_gemm(im2_col_info.tensor_shape()); diff --git a/tests/benchmark/CL/DirectConvolutionLayer.cpp b/tests/benchmark/CL/DirectConvolutionLayer.cpp index 27994b4458..c7b07807e9 100644 --- a/tests/benchmark/CL/DirectConvolutionLayer.cpp +++ b/tests/benchmark/CL/DirectConvolutionLayer.cpp @@ -27,7 +27,7 @@ #include "arm_compute/runtime/CL/CLTensorAllocator.h" #include "arm_compute/runtime/CL/functions/CLDirectConvolutionLayer.h" #include "tests/CL/CLAccessor.h" -#include "tests/benchmark/fixtures/ConvolutionLayerFixture.h" +#include "tests/benchmark/fixtures/DirectConvolutionLayerFixture.h" #include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h" #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h" #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ConvolutionLayerDataset.h" @@ -49,7 +49,7 @@ namespace const auto data_types = framework::dataset::make("DataType", { DataType::F16, DataType::F32 }); } // namespace -using CLConvolutionLayerFixture = ConvolutionLayerFixture; +using CLConvolutionLayerFixture = DirectConvolutionLayerFixture; TEST_SUITE(CL) diff --git a/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp b/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp index 784f8e85ef..d319c41088 100644 --- a/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp +++ b/tests/benchmark/GLES_COMPUTE/DirectConvolutionLayer.cpp @@ -27,7 +27,7 @@ #include "arm_compute/runtime/GLES_COMPUTE/GCTensorAllocator.h" #include "arm_compute/runtime/GLES_COMPUTE/functions/GCDirectConvolutionLayer.h" #include "tests/GLES_COMPUTE/GCAccessor.h" -#include "tests/benchmark/fixtures/ConvolutionLayerFixture.h" +#include "tests/benchmark/fixtures/DirectConvolutionLayerFixture.h" #include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h" #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h" #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ConvolutionLayerDataset.h" @@ -49,7 +49,7 @@ namespace const auto data_types = framework::dataset::make("DataType", { DataType::F32, DataType::F16 }); } // namespace -using GCConvolutionLayerFixture = ConvolutionLayerFixture; +using GCConvolutionLayerFixture = DirectConvolutionLayerFixture; TEST_SUITE(GC) diff --git a/tests/benchmark/NEON/ConvolutionLayer.cpp b/tests/benchmark/NEON/ConvolutionLayer.cpp index 9914d08c72..a425d95a6e 100644 --- a/tests/benchmark/NEON/ConvolutionLayer.cpp +++ b/tests/benchmark/NEON/ConvolutionLayer.cpp @@ -29,6 +29,7 @@ #include "arm_compute/runtime/TensorAllocator.h" #include "tests/NEON/Accessor.h" #include "tests/benchmark/fixtures/ConvolutionLayerFixture.h" +#include "tests/benchmark/fixtures/WinogradLayerFixture.h" #include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h" #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h" #include "tests/datasets/system_tests/googlenet/inceptionv4/GoogLeNetInceptionV4ConvolutionLayerDataset.h" @@ -61,7 +62,7 @@ using NEGEMMConvolutionLayerFixture = ConvolutionLayerFixture; +using NEWinogradLayerFixture = WinogradLayerFixture; REGISTER_FIXTURE_DATA_TEST_CASE(AlexNetWinogradLayer, NEWinogradLayerFixture, framework::DatasetMode::ALL, framework::dataset::combine(framework::dataset::combine(datasets::AlexNetWinogradLayerDataset(), framework::dataset::make("DataType", DataType::F32)), diff --git a/tests/benchmark/NEON/DirectConvolutionLayer.cpp b/tests/benchmark/NEON/DirectConvolutionLayer.cpp index 67b948568f..8a17f3c03c 100644 --- a/tests/benchmark/NEON/DirectConvolutionLayer.cpp +++ b/tests/benchmark/NEON/DirectConvolutionLayer.cpp @@ -27,7 +27,7 @@ #include "arm_compute/runtime/Tensor.h" #include "arm_compute/runtime/TensorAllocator.h" #include "tests/NEON/Accessor.h" -#include "tests/benchmark/fixtures/ConvolutionLayerFixture.h" +#include "tests/benchmark/fixtures/DirectConvolutionLayerFixture.h" #include "tests/datasets/DirectConvolutionLayerDataset.h" #include "tests/datasets/system_tests/alexnet/AlexNetConvolutionLayerDataset.h" #include "tests/datasets/system_tests/googlenet/inceptionv1/GoogLeNetInceptionV1ConvolutionLayerDataset.h" @@ -55,7 +55,7 @@ const auto data_types = framework::dataset::make("DataType", { DataType::F32 }); #endif /* ARM_COMPUTE_ENABLE_F16 */ } // namespace -using NEConvolutionLayerFixture = ConvolutionLayerFixture; +using NEConvolutionLayerFixture = DirectConvolutionLayerFixture; TEST_SUITE(NEON) diff --git a/tests/benchmark/fixtures/ConvolutionLayerFixture.h b/tests/benchmark/fixtures/ConvolutionLayerFixture.h index 9815040d42..7558b4c9a8 100644 --- a/tests/benchmark/fixtures/ConvolutionLayerFixture.h +++ b/tests/benchmark/fixtures/ConvolutionLayerFixture.h @@ -42,7 +42,7 @@ class ConvolutionLayerFixture : public framework::Fixture { public: template - void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches) + void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, Size2D dilation, DataType data_type, int batches) { // Set batched in source and destination shapes const unsigned int fixed_point_position = 4; @@ -57,7 +57,7 @@ public: dst = create_tensor(dst_shape, data_type, 1, fixed_point_position); // Create and configure function - conv_layer.configure(&src, &weights, &biases, &dst, info); + conv_layer.configure(&src, &weights, &biases, &dst, info, WeightsInfo(), dilation); // Allocate tensors src.allocator()->allocate(); diff --git a/tests/benchmark/fixtures/DirectConvolutionLayerFixture.h b/tests/benchmark/fixtures/DirectConvolutionLayerFixture.h new file mode 100644 index 0000000000..e3289b7fbc --- /dev/null +++ b/tests/benchmark/fixtures/DirectConvolutionLayerFixture.h @@ -0,0 +1,101 @@ +/* + * Copyright (c) 2017-2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_DIRECTCONVOLUTIONLAYERFIXTURE +#define ARM_COMPUTE_TEST_DIRECTCONVOLUTIONLAYERFIXTURE + +#include "arm_compute/core/Error.h" +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/Globals.h" +#include "tests/Utils.h" +#include "tests/framework/Fixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +/** Fixture that can be used for NEON and CL */ +template +class DirectConvolutionLayerFixture : public framework::Fixture +{ +public: + template + void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, Size2D dilation, DataType data_type, int batches) + { + ARM_COMPUTE_UNUSED(dilation); + + // Set batched in source and destination shapes + const unsigned int fixed_point_position = 4; + src_shape.set(3 /* batch */, batches); + dst_shape.set(3 /* batch */, batches); + DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + // Create tensors + src = create_tensor(src_shape, data_type, 1, fixed_point_position); + weights = create_tensor(weights_shape, data_type, 1, fixed_point_position); + biases = create_tensor(biases_shape, bias_data_type, 1, fixed_point_position); + dst = create_tensor(dst_shape, data_type, 1, fixed_point_position); + + // Create and configure function + conv_layer.configure(&src, &weights, &biases, &dst, info); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + biases.allocator()->allocate(); + dst.allocator()->allocate(); + } + + void run() + { + conv_layer.run(); + } + + void sync() + { + sync_if_necessary(); + sync_tensor_if_necessary(dst); + } + + void teardown() + { + src.allocator()->free(); + weights.allocator()->free(); + biases.allocator()->free(); + dst.allocator()->free(); + } + +private: + TensorType src{}; + TensorType weights{}; + TensorType biases{}; + TensorType dst{}; + Function conv_layer{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_DIRECTCONVOLUTIONLAYERFIXTURE */ diff --git a/tests/benchmark/fixtures/WinogradLayerFixture.h b/tests/benchmark/fixtures/WinogradLayerFixture.h new file mode 100644 index 0000000000..31a1eb8e1c --- /dev/null +++ b/tests/benchmark/fixtures/WinogradLayerFixture.h @@ -0,0 +1,100 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_WINOGRADLAYERFIXTURE +#define ARM_COMPUTE_TEST_WINOGRADLAYERFIXTURE + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/Globals.h" +#include "tests/Utils.h" +#include "tests/framework/Fixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace benchmark +{ +/** Fixture that can be used for NEON and CL */ +template +class WinogradLayerFixture : public framework::Fixture +{ +public: + template + void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, Size2D dilation, DataType data_type, int batches) + { + ARM_COMPUTE_UNUSED(dilation); + + // Set batched in source and destination shapes + const unsigned int fixed_point_position = 4; + src_shape.set(3 /* batch */, batches); + dst_shape.set(3 /* batch */, batches); + DataType bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + // Create tensors + src = create_tensor(src_shape, data_type, 1, fixed_point_position); + weights = create_tensor(weights_shape, data_type, 1, fixed_point_position); + biases = create_tensor(biases_shape, bias_data_type, 1, fixed_point_position); + dst = create_tensor(dst_shape, data_type, 1, fixed_point_position); + + // Create and configure function + conv_layer.configure(&src, &weights, &biases, &dst, info); + + // Allocate tensors + src.allocator()->allocate(); + weights.allocator()->allocate(); + biases.allocator()->allocate(); + dst.allocator()->allocate(); + } + + void run() + { + conv_layer.run(); + } + + void sync() + { + sync_if_necessary(); + sync_tensor_if_necessary(dst); + } + + void teardown() + { + src.allocator()->free(); + weights.allocator()->free(); + biases.allocator()->free(); + dst.allocator()->free(); + } + +private: + TensorType src{}; + TensorType weights{}; + TensorType biases{}; + TensorType dst{}; + Function conv_layer{}; +}; +} // namespace benchmark +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_WINOGRADLAYERFIXTURE */ diff --git a/tests/datasets/ConvolutionLayerDataset.h b/tests/datasets/ConvolutionLayerDataset.h index 6e2d2a142e..2981994555 100644 --- a/tests/datasets/ConvolutionLayerDataset.h +++ b/tests/datasets/ConvolutionLayerDataset.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -38,7 +38,7 @@ namespace datasets class ConvolutionLayerDataset { public: - using type = std::tuple; + using type = std::tuple; struct iterator { @@ -46,12 +46,14 @@ public: std::vector::const_iterator weights_it, std::vector::const_iterator biases_it, std::vector::const_iterator dst_it, - std::vector::const_iterator infos_it) + std::vector::const_iterator infos_it, + std::vector::const_iterator dilation_it) : _src_it{ std::move(src_it) }, _weights_it{ std::move(weights_it) }, _biases_it{ std::move(biases_it) }, _dst_it{ std::move(dst_it) }, - _infos_it{ std::move(infos_it) } + _infos_it{ std::move(infos_it) }, + _dilation_it{ std::move(dilation_it) } { } @@ -62,13 +64,14 @@ public: description << "Weights=" << *_weights_it << ":"; description << "Biases=" << *_biases_it << ":"; description << "Out=" << *_dst_it << ":"; - description << "Info=" << *_infos_it; + description << "Info=" << *_infos_it << ":"; + description << "Dilation=" << *_dilation_it; return description.str(); } ConvolutionLayerDataset::type operator*() const { - return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it); + return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it, *_dilation_it); } iterator &operator++() @@ -78,6 +81,7 @@ public: ++_biases_it; ++_dst_it; ++_infos_it; + ++_dilation_it; return *this; } @@ -88,25 +92,27 @@ public: std::vector::const_iterator _biases_it; std::vector::const_iterator _dst_it; std::vector::const_iterator _infos_it; + std::vector::const_iterator _dilation_it; }; iterator begin() const { - return iterator(_src_shapes.begin(), _weight_shapes.begin(), _bias_shapes.begin(), _dst_shapes.begin(), _infos.begin()); + return iterator(_src_shapes.begin(), _weight_shapes.begin(), _bias_shapes.begin(), _dst_shapes.begin(), _infos.begin(), _dilations.begin()); } int size() const { - return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_bias_shapes.size(), std::min(_dst_shapes.size(), _infos.size())))); + return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_bias_shapes.size(), std::min(_dst_shapes.size(), std::min(_infos.size(), _dilations.size()))))); } - void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info) + void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info, Size2D dilation = Size2D(1U, 1U)) { _src_shapes.emplace_back(std::move(src)); _weight_shapes.emplace_back(std::move(weights)); _bias_shapes.emplace_back(std::move(biases)); _dst_shapes.emplace_back(std::move(dst)); _infos.emplace_back(std::move(info)); + _dilations.emplace_back(std::move(dilation)); } protected: @@ -119,6 +125,7 @@ private: std::vector _bias_shapes{}; std::vector _dst_shapes{}; std::vector _infos{}; + std::vector _dilations{}; }; } // namespace datasets } // namespace test diff --git a/tests/datasets/DilatedConvolutionLayerDataset.h b/tests/datasets/DilatedConvolutionLayerDataset.h new file mode 100644 index 0000000000..3bbf282a31 --- /dev/null +++ b/tests/datasets/DilatedConvolutionLayerDataset.h @@ -0,0 +1,81 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET +#define ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET + +#include "utils/TypePrinter.h" + +#include "arm_compute/core/TensorShape.h" +#include "arm_compute/core/Types.h" +#include "tests/datasets/ConvolutionLayerDataset.h" + +namespace arm_compute +{ +namespace test +{ +namespace datasets +{ +class TinyDilatedConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + TinyDilatedConvolutionLayerDataset() + { + // Batch size 1 + add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(10U, 23U, 21U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(11U, 10U, 16U), PadStrideInfo(3, 2, 1, 0), Size2D(1U, 2U)); + // Batch size 4 + add_config(TensorShape(17U, 31U, 2U, 4U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(11U, 13U, 19U, 4U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U)); + } +}; + +class SmallDilatedConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + SmallDilatedConvolutionLayerDataset() + { + add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 3U, 5U, 21U), TensorShape(21U), TensorShape(10U, 23U, 21U), PadStrideInfo(2, 1, 0, 0), Size2D(2U, 2U)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 5U, 7U, 16U), TensorShape(16U), TensorShape(11U, 10U, 16U), PadStrideInfo(3, 2, 1, 0), Size2D(1U, 2U)); + add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 5U, 2U, 19U), TensorShape(19U), TensorShape(11U, 15U, 19U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 1U)); + add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 1U, 5U, 21U), TensorShape(21U), TensorShape(9U, 27U, 21U), PadStrideInfo(2, 1, 0, 0), Size2D(3U, 1U)); + add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 3U, 2U, 19U), TensorShape(19U), TensorShape(11U, 15U, 19U), PadStrideInfo(1, 2, 1, 1), Size2D(2U, 2U)); + } +}; + +class LargeDilatedConvolutionLayerDataset final : public ConvolutionLayerDataset +{ +public: + LargeDilatedConvolutionLayerDataset() + { + // Batch size 1 + add_config(TensorShape(27U, 27U, 96U), TensorShape(5U, 5U, 96U, 256U), TensorShape(256U), TensorShape(15U, 15U, 256U), PadStrideInfo(1, 1, 2, 2), Size2D(4U, 4U)); + add_config(TensorShape(13U, 13U, 256U), TensorShape(3U, 3U, 256U, 384U), TensorShape(384U), TensorShape(11U, 9U, 384U), PadStrideInfo(1, 1, 1, 1), Size2D(2U, 3U)); + add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 384U), TensorShape(384U), TensorShape(9U, 11U, 384U), PadStrideInfo(1, 1, 1, 1), Size2D(3U, 2U)); + add_config(TensorShape(13U, 13U, 384U), TensorShape(3U, 3U, 384U, 256U), TensorShape(256U), TensorShape(7U, 7U, 256U), PadStrideInfo(1, 1, 1, 1), Size2D(4U, 4U)); + add_config(TensorShape(224U, 224U, 3U), TensorShape(7U, 7U, 3U, 64U), TensorShape(64U), TensorShape(109U, 112U, 64U), PadStrideInfo(2, 2, 3, 3), Size2D(2U, 1U)); + } +}; +} // namespace datasets +} // namespace test +} // namespace arm_compute +#endif /* ARM_COMPUTE_TEST_DILATED_CONVOLUTION_LAYER_DATASET */ diff --git a/tests/validation/CL/ConvolutionLayer.cpp b/tests/validation/CL/ConvolutionLayer.cpp index 98d00ac47a..c50519b6ac 100644 --- a/tests/validation/CL/ConvolutionLayer.cpp +++ b/tests/validation/CL/ConvolutionLayer.cpp @@ -117,8 +117,10 @@ TEST_SUITE_END() TEST_SUITE(GEMMConvolutionLayer) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes), - input_shape, weights_shape, bias_shape, output_shape, info, data_type) + input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type) { + ARM_COMPUTE_UNUSED(dilation); + // Set fixed point position data type allowed int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0; diff --git a/tests/validation/CL/DilatedConvolutionLayer.cpp b/tests/validation/CL/DilatedConvolutionLayer.cpp new file mode 100644 index 0000000000..dadae2227b --- /dev/null +++ b/tests/validation/CL/DilatedConvolutionLayer.cpp @@ -0,0 +1,289 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/CL/CLTensor.h" +#include "arm_compute/runtime/CL/CLTensorAllocator.h" +#include "arm_compute/runtime/CL/functions/CLConvolutionLayer.h" +#include "arm_compute/runtime/CL/functions/CLGEMMConvolutionLayer.h" +#include "tests/CL/CLAccessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/DilatedConvolutionLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ConvolutionLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +RelativeTolerance tolerance_f32(0.05f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +RelativeTolerance tolerance_f16(half_float::half(0.2)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr AbsoluteTolerance tolerance_fixed(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */ +constexpr AbsoluteTolerance tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ +constexpr float tolerance_num = 0.07f; /**< Tolerance number */ + +/** CNN data types */ +const auto CNNDataTypes = framework::dataset::make("DataType", +{ + DataType::F16, + DataType::F32, + DataType::QS8, + DataType::QS16, + DataType::QASYMM8, +}); +} // namespace + +TEST_SUITE(CL) +TEST_SUITE(DilatedConvolutionLayer) + +DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0), + TensorInfo(TensorShape(17U, 31U, 2U), 1, DataType::F32, 0), + TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32, 0), + TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0), + TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0) + }), + framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0), + TensorInfo(TensorShape(5U, 5U, 2U, 19U), 1, DataType::F32, 0), + TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0) + })), + framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(19U), 1, DataType::F32, 0), + TensorInfo(TensorShape(19U), 1, DataType::F32, 0), + TensorInfo(TensorShape(21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(16U), 1, DataType::F32, 0) + })), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0), + TensorInfo(TensorShape(15U, 15U, 19U), 1, DataType::F32, 0), + TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0), + TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0) + })), + framework::dataset::make("ConvInfo", { PadStrideInfo(1, 2, 1, 1), + PadStrideInfo(1, 2, 1, 1), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(2, 1, 0, 0), + PadStrideInfo(3, 2, 1, 0) + })), + framework::dataset::make("GpuTarget", { GPUTarget::BIFROST, + GPUTarget::MIDGARD, + GPUTarget::G71, + GPUTarget::MIDGARD, + GPUTarget::BIFROST + })), + framework::dataset::make("Dilation", { Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(1U, 1U), + Size2D(2U, 2U), + Size2D(3U, 3U) + })), + + framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })), + input_info, weights_info, biases_info, output_info, conv_info, gpu_target, dilation, expected) +{ + ConvolutionMethod is_valid = CLConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(false), + &weights_info.clone()->set_is_resizable(false), + &biases_info.clone()->set_is_resizable(false), + &output_info.clone()->set_is_resizable(false), conv_info, WeightsInfo(), gpu_target, dilation); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +TEST_SUITE_END() + +TEST_SUITE(GEMMDilatedConvolutionLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDilatedConvolutionLayerDataset(), datasets::LargeDilatedConvolutionLayerDataset()), + CNNDataTypes), + input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type) +{ + // Set fixed point position data type allowed + int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0; + + auto bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + // Create tensors + CLTensor src = create_tensor(input_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127)); + CLTensor weights = create_tensor(weights_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127)); + CLTensor bias = create_tensor(bias_shape, bias_data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127)); + CLTensor dst = create_tensor(output_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127)); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + const QuantizationInfo src_quantization_info = src.info()->quantization_info(); + const QuantizationInfo weights_quantization_info = weights.info()->quantization_info(); + + // Create and configure function + CLGEMMConvolutionLayer conv; + conv.configure(&src, &weights, &bias, &dst, info, WeightsInfo(), dilation); + + // Validate valid region + const ValidRegion src_valid_region = shape_to_valid_region(input_shape); + const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape); + const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape); + const ValidRegion dst_valid_region = shape_to_valid_region(output_shape); + + validate(src.info()->valid_region(), src_valid_region); + validate(weights.info()->valid_region(), weights_valid_region); + validate(bias.info()->valid_region(), bias_valid_region); + validate(dst.info()->valid_region(), dst_valid_region); + + // Validate QuantizationInfo + ARM_COMPUTE_EXPECT(src.info()->quantization_info() == src_quantization_info, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS); + + // Validate padding + //TODO(COMPMID-415) Need to validate padding? +} + +template +using CLGEMMDilatedConvolutionLayerFixture = ConvolutionValidationFixture; + +TEST_SUITE(Float) +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", + DataType::F16))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); +} +TEST_SUITE_END() + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", + DataType::F32))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +template +using CLGEMMDilatedConvolutionLayerFixedPointFixture = ConvolutionValidationFixedPointFixture; + +TEST_SUITE(FixedPoint) +TEST_SUITE(QS8) +// We test for fixed point precision [4,6] +FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMDilatedConvolutionLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", + DataType::QS8)), + framework::dataset::make("FractionalBits", 4, 7))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_fixed); +} +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", + DataType::QS8)), + framework::dataset::make("FractionalBits", 4, 7))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_fixed); +} +TEST_SUITE_END() + +TEST_SUITE(QS16) +// Testing for fixed point position [1,14) +FIXTURE_DATA_TEST_CASE(RunTiny, CLGEMMDilatedConvolutionLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", + DataType::QS16)), + framework::dataset::make("FractionalBits", 1, 14))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_fixed); +} +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", + DataType::QS16)), + framework::dataset::make("FractionalBits", 1, 14))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_fixed); +} +TEST_SUITE_END() +TEST_SUITE_END() + +template +using CLGEMMDilatedConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture; + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, CLGEMMDilatedConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, CLGEMMDilatedConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 0) }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/CL/LocallyConnected.cpp b/tests/validation/CL/LocallyConnected.cpp index de79d60e7f..6387fcc64a 100644 --- a/tests/validation/CL/LocallyConnected.cpp +++ b/tests/validation/CL/LocallyConnected.cpp @@ -110,8 +110,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallLocallyConnectedDataset(), datasets::LargeLocallyConnectedDataset()), framework::dataset::make("DataType", DataType::F32)), - src_shape, weights_shape, bias_shape, dst_shape, info, data_type) + src_shape, weights_shape, bias_shape, dst_shape, info, dilation, data_type) { + ARM_COMPUTE_UNUSED(dilation); + // Create tensors CLTensor src = create_tensor(src_shape, data_type); CLTensor weights = create_tensor(weights_shape, data_type); diff --git a/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp index c66a1001e5..aaa0d159be 100644 --- a/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp +++ b/tests/validation/GLES_COMPUTE/ConvolutionLayer.cpp @@ -59,7 +59,7 @@ TEST_SUITE(GC) TEST_SUITE(ConvolutionLayer) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes), - input_shape, weights_shape, bias_shape, output_shape, info, data_type) + input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type) { // Set fixed point position data type allowed int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0; @@ -82,7 +82,7 @@ DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::da // Create and configure function GCConvolutionLayer conv; - conv.configure(&src, &weights, &bias, &dst, info); + conv.configure(&src, &weights, &bias, &dst, info, WeightsInfo(), dilation); // Validate valid region const ValidRegion src_valid_region = shape_to_valid_region(input_shape); diff --git a/tests/validation/NEON/ConvolutionLayer.cpp b/tests/validation/NEON/ConvolutionLayer.cpp index 34306b381c..313e4bc4d6 100644 --- a/tests/validation/NEON/ConvolutionLayer.cpp +++ b/tests/validation/NEON/ConvolutionLayer.cpp @@ -126,8 +126,10 @@ TEST_SUITE_END() TEST_SUITE(GEMMConvolutionLayer) DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallConvolutionLayerDataset(), datasets::LargeConvolutionLayerDataset()), CNNDataTypes), - input_shape, weights_shape, bias_shape, output_shape, info, data_type) + input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type) { + ARM_COMPUTE_UNUSED(dilation); + // Set fixed point position data type allowed int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0; diff --git a/tests/validation/NEON/DilatedConvolutionLayer.cpp b/tests/validation/NEON/DilatedConvolutionLayer.cpp new file mode 100644 index 0000000000..c0a72ec8cb --- /dev/null +++ b/tests/validation/NEON/DilatedConvolutionLayer.cpp @@ -0,0 +1,274 @@ +/* + * Copyright (c) 2018 ARM Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/core/Types.h" +#include "arm_compute/runtime/NEON/functions/NEConvolutionLayer.h" +#include "arm_compute/runtime/NEON/functions/NEGEMMConvolutionLayer.h" +#include "arm_compute/runtime/Tensor.h" +#include "arm_compute/runtime/TensorAllocator.h" +#include "tests/NEON/Accessor.h" +#include "tests/PaddingCalculator.h" +#include "tests/datasets/DilatedConvolutionLayerDataset.h" +#include "tests/framework/Asserts.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" +#include "tests/validation/fixtures/ConvolutionLayerFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +namespace +{ +const AbsoluteTolerance tolerance_f32(0.001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +const AbsoluteTolerance tolerance_f16(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ +const AbsoluteTolerance tolerance_q(1.0f); /**< Tolerance value for comparing reference's output against implementation's output for fixed point data types */ +constexpr AbsoluteTolerance tolerance_qasymm8(0.0); /**< Tolerance value for comparing reference's output against implementation's output for quantized data types */ + +/** CNN data types */ +const auto CNNDataTypes = framework::dataset::make("DataType", +{ +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC + DataType::F16, +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + DataType::F32, + DataType::QS8, + DataType::QS16, + DataType::QASYMM8, +}); +} // namespace + +TEST_SUITE(NEON) + +TEST_SUITE(DilatedConvolutionLayer) +DATA_TEST_CASE(ValidateConvolutionMethod, framework::DatasetMode::ALL, zip(zip(zip(zip(zip(zip( + framework::dataset::make("InputInfo", { TensorInfo(TensorShape(8U, 8U, 2U), 1, DataType::F32, 0), + TensorInfo(TensorShape(23U, 27U, 5U, 4U), 1, DataType::F32, 0), + TensorInfo(TensorShape(3U, 3U, 2U, 1U), 1, DataType::F32, 0), + TensorInfo(TensorShape(33U, 27U, 7U, 4U), 1, DataType::F32, 0) + }), + framework::dataset::make("WeightsInfo", { TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(3U, 3U, 5U, 21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(5U, 5U, 7U, 16U), 1, DataType::F16, 0) + })), + framework::dataset::make("BiasesInfo", { TensorInfo(TensorShape(1U), 1, DataType::F32, 0), + TensorInfo(TensorShape(21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(16U), 1, DataType::F32, 0) + })), + framework::dataset::make("OutputInfo", { TensorInfo(TensorShape(6U, 6U, 1U), 1, DataType::F32, 0), + TensorInfo(TensorShape(21U, 25U, 21U, 4U), 1, DataType::F32, 0), + TensorInfo(TensorShape(11U, 25U, 21U), 1, DataType::F32, 0), + TensorInfo(TensorShape(11U, 12U, 16U, 4U), 1, DataType::F32, 0) + })), + framework::dataset::make("ConvInfo", { PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(1, 1, 0, 0), + PadStrideInfo(2, 1, 0, 0), + PadStrideInfo(3, 2, 1, 0) + })), + framework::dataset::make("Dilation", { Size2D(1U, 2U), + Size2D(2U, 1U), + Size2D(2U, 2U), + Size2D(3U, 3U) + })), + framework::dataset::make("Expected", { ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM, ConvolutionMethod::GEMM })), + input_info, weights_info, biases_info, output_info, conv_info, dilation, expected) +{ + ConvolutionMethod is_valid = NEConvolutionLayer::get_convolution_method(&input_info.clone()->set_is_resizable(false), + &weights_info.clone()->set_is_resizable(false), + &biases_info.clone()->set_is_resizable(false), + &output_info.clone()->set_is_resizable(false), + conv_info, WeightsInfo(), dilation); + ARM_COMPUTE_EXPECT(is_valid == expected, framework::LogLevel::ERRORS); +} +TEST_SUITE_END() + +TEST_SUITE(GEMMDilatedConvolutionLayer) + +DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallDilatedConvolutionLayerDataset(), datasets::LargeDilatedConvolutionLayerDataset()), + CNNDataTypes), + input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type) +{ + // Set fixed point position data type allowed + int fixed_point_position = is_data_type_fixed_point(data_type) ? 3 : 0; + + auto bias_data_type = is_data_type_quantized_asymmetric(data_type) ? DataType::S32 : data_type; + + // Create tensors + Tensor src = create_tensor(input_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127)); + Tensor weights = create_tensor(weights_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127)); + Tensor bias = create_tensor(bias_shape, bias_data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127)); + Tensor dst = create_tensor(output_shape, data_type, 1, fixed_point_position, QuantizationInfo(2.f / 255.f, 127)); + + ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(bias.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(dst.info()->is_resizable(), framework::LogLevel::ERRORS); + + const QuantizationInfo src_quantization_info = src.info()->quantization_info(); + const QuantizationInfo weights_quantization_info = weights.info()->quantization_info(); + + // Create and configure function + NEGEMMConvolutionLayer conv; + conv.configure(&src, &weights, &bias, &dst, info, WeightsInfo(), dilation); + + // Validate valid region + const ValidRegion src_valid_region = shape_to_valid_region(input_shape); + const ValidRegion weights_valid_region = shape_to_valid_region(weights_shape); + const ValidRegion bias_valid_region = shape_to_valid_region(bias_shape); + const ValidRegion dst_valid_region = shape_to_valid_region(output_shape); + + validate(src.info()->valid_region(), src_valid_region); + validate(weights.info()->valid_region(), weights_valid_region); + validate(bias.info()->valid_region(), bias_valid_region); + validate(dst.info()->valid_region(), dst_valid_region); + + // Validate QuantizationInfo + ARM_COMPUTE_EXPECT(src.info()->quantization_info() == src_quantization_info, framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(weights.info()->quantization_info() == weights_quantization_info, framework::LogLevel::ERRORS); + + // Validate padding + //TODO(COMPMID-415) Need to validate padding? +} + +template +using NEGEMMDilatedConvolutionLayerFixture = ConvolutionValidationFixture; + +TEST_SUITE(Float) +#ifdef __ARM_FEATURE_FP16_VECTOR_ARITHMETIC +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true, false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true, false })), + framework::dataset::make("DataType", DataType::F16))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() +#endif /* __ARM_FEATURE_FP16_VECTOR_ARITHMETIC */ + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true, false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(datasets::LargeDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true, false })), + framework::dataset::make("DataType", DataType::F32))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() +TEST_SUITE_END() + +template +using NEGEMMDilatedConvolutionLayerFixedPointFixture = ConvolutionValidationFixedPointFixture; + +TEST_SUITE(FixedPoint) +TEST_SUITE(QS8) +// We test for fixed point precision [4,6] +FIXTURE_DATA_TEST_CASE(RunTiny, NEGEMMDilatedConvolutionLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true, false })), + framework::dataset::make("DataType", DataType::QS8)), + framework::dataset::make("FractionalBits", 4, 7))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_q); +} +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true, false })), + framework::dataset::make("DataType", DataType::QS8)), + framework::dataset::make("FractionalBits", 4, 7))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_q); +} +TEST_SUITE_END() + +TEST_SUITE(QS16) +// Testing for fixed point position [1,14) +FIXTURE_DATA_TEST_CASE(RunTiny, NEGEMMDilatedConvolutionLayerFixedPointFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::TinyDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true, false })), + framework::dataset::make("DataType", DataType::QS16)), + framework::dataset::make("FractionalBits", 1, 14))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_q); +} +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerFixedPointFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true, false })), + framework::dataset::make("DataType", DataType::QS16)), + framework::dataset::make("FractionalBits", 1, 14))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_q); +} +TEST_SUITE_END() +TEST_SUITE_END() + +template +using NEGEMMDilatedConvolutionLayerQuantizedFixture = ConvolutionValidationQuantizedFixture; + +TEST_SUITE(Quantized) +TEST_SUITE(QASYMM8) +FIXTURE_DATA_TEST_CASE(RunSmall, NEGEMMDilatedConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +FIXTURE_DATA_TEST_CASE(RunLarge, NEGEMMDilatedConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDilatedConvolutionLayerDataset(), + framework::dataset::make("ReshapeWeights", { true })), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) }))) +{ + // Validate output + validate(Accessor(_target), _reference, tolerance_qasymm8); +} +TEST_SUITE_END() +TEST_SUITE_END() + +TEST_SUITE_END() +TEST_SUITE_END() +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/NEON/LocallyConnected.cpp b/tests/validation/NEON/LocallyConnected.cpp index b00f27445c..0c36ff6c85 100644 --- a/tests/validation/NEON/LocallyConnected.cpp +++ b/tests/validation/NEON/LocallyConnected.cpp @@ -111,8 +111,10 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(zip( DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallLocallyConnectedDataset(), datasets::LargeLocallyConnectedDataset()), framework::dataset::make("DataType", DataType::F32)), - src_shape, weights_shape, bias_shape, dst_shape, info, data_type) + src_shape, weights_shape, bias_shape, dst_shape, info, dilation, data_type) { + ARM_COMPUTE_UNUSED(dilation); + // Create tensors Tensor src = create_tensor(src_shape, data_type); Tensor weights = create_tensor(weights_shape, data_type); diff --git a/tests/validation/fixtures/ConvolutionLayerFixture.h b/tests/validation/fixtures/ConvolutionLayerFixture.h index 48b4665fe7..6a100acef3 100644 --- a/tests/validation/fixtures/ConvolutionLayerFixture.h +++ b/tests/validation/fixtures/ConvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -54,7 +54,7 @@ public: public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, int fractional_bits, QuantizationInfo quantization_info) { _data_type = data_type; @@ -63,8 +63,8 @@ public: _fractional_bits = fractional_bits; _quantization_info = quantization_info; - _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights); - _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info); + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, dilation); + _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, dilation); } protected: @@ -98,7 +98,7 @@ protected: } TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, - bool reshape_weights) + bool reshape_weights, const Size2D &dilation) { WeightsInfo weights_info(!reshape_weights, weights_shape.x(), weights_shape.y(), weights_shape[3]); TensorShape reshaped_weights_shape(weights_shape); @@ -144,7 +144,7 @@ protected: // Create and configure function FunctionType conv; - conv.configure(&src, &weights, &bias, &dst, info, weights_info); + conv.configure(&src, &weights, &bias, &dst, info, weights_info, dilation); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -220,7 +220,8 @@ protected: return dst; } - SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info) + SimpleTensor compute_reference(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &bias_shape, const TensorShape &output_shape, const PadStrideInfo &info, + const Size2D &dilation) { // Create reference SimpleTensor src{ input_shape, _data_type, 1, _fractional_bits, _quantization_info }; @@ -232,7 +233,7 @@ protected: fill(weights, 1); fill(bias, 2); - return reference::convolution_layer(src, weights, bias, output_shape, info); + return reference::convolution_layer(src, weights, bias, output_shape, info, dilation); } TensorType _target{}; @@ -293,9 +294,10 @@ class ConvolutionValidationFixture : public ConvolutionValidationGenericFixture< { public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type) + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type) { - ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, 0, QuantizationInfo()); + ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, 0, + QuantizationInfo()); } }; @@ -304,9 +306,10 @@ class ConvolutionValidationFixedPointFixture : public ConvolutionValidationGener { public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type, int fractional_bits) + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, + int fractional_bits) { - ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, fractional_bits, + ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, fractional_bits, QuantizationInfo()); } }; @@ -316,10 +319,11 @@ class ConvolutionValidationQuantizedFixture : public ConvolutionValidationGeneri { public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, bool reshape_weights, DataType data_type, + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, bool reshape_weights, DataType data_type, QuantizationInfo quantization_info) { - ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, reshape_weights, data_type, 0, quantization_info); + ConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, reshape_weights, data_type, 0, + quantization_info); } }; } // namespace validation diff --git a/tests/validation/fixtures/DirectConvolutionLayerFixture.h b/tests/validation/fixtures/DirectConvolutionLayerFixture.h index d63a5bcdba..fef9d2dc6e 100644 --- a/tests/validation/fixtures/DirectConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DirectConvolutionLayerFixture.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -66,9 +66,11 @@ public: } template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type, int fractional_bits, QuantizationInfo quantization_info) { + ARM_COMPUTE_UNUSED(dilation); + _fractional_bits = fractional_bits; _quantization_info = quantization_info; _data_type = data_type; @@ -226,10 +228,10 @@ class DirectConvolutionValidationWithTensorShapesQuantizedFixture : public Direc { public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type, QuantizationInfo quantization_info) { - DirectConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, quantization_info); + DirectConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type, 0, quantization_info); } }; @@ -238,10 +240,10 @@ class DirectConvolutionValidationWithTensorShapesFixture : public DirectConvolut { public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type) { - DirectConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, QuantizationInfo()); + DirectConvolutionValidationGenericFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation, data_type, 0, QuantizationInfo()); } }; diff --git a/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h b/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h index d810a765cb..09b6d830b4 100644 --- a/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h +++ b/tests/validation/fixtures/DirectConvolutionLayerTensorShiftFixture.h @@ -67,9 +67,11 @@ public: } template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, unsigned int dilation_x, unsigned int dilation_y, DataType data_type, int fractional_bits, QuantizationInfo quantization_info) { + ARM_COMPUTE_UNUSED(dilation_x, dilation_y); + _fractional_bits = fractional_bits; _quantization_info = quantization_info; _data_type = data_type; @@ -245,10 +247,11 @@ class DirectConvolutionValidationWithTensorShapesQuantizedTensorShiftFixture : p { public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, unsigned int dilation_x, unsigned int dilation_y, DataType data_type, QuantizationInfo quantization_info) { - DirectConvolutionValidationGenericTensorShiftFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, quantization_info); + DirectConvolutionValidationGenericTensorShiftFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation_x, dilation_y, data_type, 0, + quantization_info); } }; @@ -257,10 +260,11 @@ class DirectConvolutionValidationWithTensorShapesTensorShiftFixture : public Dir { public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, unsigned int dilation_x, unsigned int dilation_y, DataType data_type) { - DirectConvolutionValidationGenericTensorShiftFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, data_type, 0, QuantizationInfo()); + DirectConvolutionValidationGenericTensorShiftFixture::setup(input_shape, weights_shape, bias_shape, output_shape, info, dilation_x, dilation_y, data_type, 0, + QuantizationInfo()); } }; diff --git a/tests/validation/fixtures/LocallyConnectedFixture.h b/tests/validation/fixtures/LocallyConnectedFixture.h index ab9819e56f..f87e6e470c 100644 --- a/tests/validation/fixtures/LocallyConnectedFixture.h +++ b/tests/validation/fixtures/LocallyConnectedFixture.h @@ -24,6 +24,7 @@ #ifndef ARM_COMPUTE_TEST_LOCALLY_CONNECTED_FIXTURE #define ARM_COMPUTE_TEST_LOCALLY_CONNECTED_FIXTURE +#include "arm_compute/core/Error.h" #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "arm_compute/runtime/NEON/NEScheduler.h" @@ -54,8 +55,10 @@ public: public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, DataType data_type) + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type) { + ARM_COMPUTE_UNUSED(dilation); + _data_type = data_type; _bias_data_type = data_type; diff --git a/tests/validation/fixtures/WinogradLayerFixture.h b/tests/validation/fixtures/WinogradLayerFixture.h index c427f8d20e..a86f24f35e 100644 --- a/tests/validation/fixtures/WinogradLayerFixture.h +++ b/tests/validation/fixtures/WinogradLayerFixture.h @@ -52,8 +52,10 @@ class WinogradConvolutionLayerValidationFixture : public framework::Fixture { public: template - void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, DataType data_type) + void setup(TensorShape input_shape, TensorShape weights_shape, TensorShape bias_shape, TensorShape output_shape, PadStrideInfo info, Size2D dilation, DataType data_type) { + ARM_COMPUTE_UNUSED(dilation); + _target = compute_target(input_shape, weights_shape, bias_shape, output_shape, info, data_type); _reference = compute_reference(input_shape, weights_shape, bias_shape, output_shape, info, data_type); } diff --git a/tests/validation/reference/Convolution3d.h b/tests/validation/reference/Convolution3d.h index b99d534635..700175880b 100644 --- a/tests/validation/reference/Convolution3d.h +++ b/tests/validation/reference/Convolution3d.h @@ -46,7 +46,7 @@ inline bool is_valid_pixel(int i, int min, int max) template < typename T, typename TB, typename std::enable_if < validation::is_floating_point::value &&validation::is_floating_point::value, int >::type = 0 > inline void convolution3d(const SimpleTensor &in, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &out, int i_offset, int w_offset, int b_offset, int o_offset, - int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights) + int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1) { const T *in_ptr = in.data() + i_offset; const T *w_ptr = weights.data() + w_offset; @@ -73,12 +73,12 @@ inline void convolution3d(const SimpleTensor &in, const SimpleTensor &weig for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk) { // Check if the pixel is out-of-bound - if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in)) + if(is_valid_pixel(xi + xk * dilation_x, 0, width_in) && is_valid_pixel(yi + yk * dilation_y, 0, height_in)) { const int idx = xk + half_width_weights_start; const int idy = yk + half_height_weights_start; - const T i_value = in_ptr[offset_slice_in + xk + yk * width_in]; + const T i_value = in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in]; const T w_value = w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights]; acc += i_value * w_value; @@ -95,7 +95,7 @@ inline void convolution3d(const SimpleTensor &in, const SimpleTensor &weig template < typename T, typename TB, typename std::enable_if < std::is_integral::value &&std::is_integral::value, int >::type = 0 > inline void convolution3d(const SimpleTensor &in, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &out, int i_offset, int w_offset, int b_offset, int o_offset, - int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights) + int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x = 1, int dilation_y = 1) { const T *in_ptr = in.data() + i_offset; const T *w_ptr = weights.data() + w_offset; @@ -126,12 +126,12 @@ inline void convolution3d(const SimpleTensor &in, const SimpleTensor &weig for(int xk = -half_width_weights_start; xk <= half_width_weights_end; ++xk) { // Check if the pixel is out-of-bound - if(is_valid_pixel(xi + xk, 0, width_in) && is_valid_pixel(yi + yk, 0, height_in)) + if(is_valid_pixel(xi + xk * dilation_x, 0, width_in) && is_valid_pixel(yi + yk * dilation_y, 0, height_in)) { const int idx = xk + half_width_weights_start; const int idy = yk + half_height_weights_start; - const fixed_point i_value(in_ptr[offset_slice_in + xk + yk * width_in], fixed_point_position, true); + const fixed_point i_value(in_ptr[offset_slice_in + xk * dilation_x + yk * dilation_y * width_in], fixed_point_position, true); const fixed_point w_value(w_ptr[idx + idy * width_weights + ifm * width_weights * height_weights], fixed_point_position, true); const fixed_point iw = i_value * w_value; acc = iw + acc; @@ -153,7 +153,7 @@ inline void convolution3d(const SimpleTensor &in, const SimpleTensor &weig template <> inline void convolution3d(const SimpleTensor &in, const SimpleTensor &weights, const SimpleTensor &bias, SimpleTensor &out, int i_offset, int w_offset, int b_offset, int o_offset, - int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights) + int xi, int yi, int width_in, int height_in, int depth_in, int width_weights, int height_weights, int dilation_x, int dilation_y) { const uint8_t *in_ptr = in.data() + i_offset; const uint8_t *w_ptr = weights.data() + w_offset; @@ -192,12 +192,12 @@ inline void convolution3d(const SimpleTensor &in, const SimpleTensor -SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, const PadStrideInfo &info) +SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, const PadStrideInfo &info, + const Size2D &dilation) { // Create reference SimpleTensor dst{ output_shape, src.data_type(), 1, src.fixed_point_position(), src.quantization_info() }; @@ -66,10 +67,10 @@ SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor const int stride_xi = info.stride().first; const int stride_yi = info.stride().second; - auto output_wh = scaled_dimensions(width_in, height_in, width_weights, height_weights, info); + auto output_wh = scaled_dimensions(width_in, height_in, width_weights, height_weights, info, dilation); - const int start_xi = width_weights / 2 - pad_left; - const int start_yi = height_weights / 2 - pad_top; + const int start_xi = (dilation.x() * (width_weights - 1) + 1) / 2 - pad_left; + const int start_yi = (dilation.y() * (height_weights - 1) + 1) / 2 - pad_top; const int end_xi = output_wh.first * stride_xi; const int end_yi = output_wh.second * stride_yi; const int num_batches = src.shape().total_size() / (width_in * height_in * depth_in); @@ -96,7 +97,7 @@ SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor offset_in, ofm * width_weights * height_weights * depth_weights, ofm, offset_out, xi, yi, width_in, height_in, depth_in, - width_weights, height_weights); + width_weights, height_weights, dilation.x(), dilation.y()); } } } @@ -106,15 +107,15 @@ SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor } template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info); + const PadStrideInfo &info, const Size2D &dilation); template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info); + const PadStrideInfo &info, const Size2D &dilation); template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info); + const PadStrideInfo &info, const Size2D &dilation); template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info); + const PadStrideInfo &info, const Size2D &dilation); template SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, - const PadStrideInfo &info); + const PadStrideInfo &info, const Size2D &dilation); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/reference/ConvolutionLayer.h b/tests/validation/reference/ConvolutionLayer.h index 57455ba401..ff3b1531f4 100644 --- a/tests/validation/reference/ConvolutionLayer.h +++ b/tests/validation/reference/ConvolutionLayer.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017 ARM Limited. + * Copyright (c) 2017-2018 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -36,7 +36,8 @@ namespace validation namespace reference { template -SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, const PadStrideInfo &info); +SimpleTensor convolution_layer(const SimpleTensor &src, const SimpleTensor &weights, const SimpleTensor &bias, const TensorShape &output_shape, const PadStrideInfo &info, + const Size2D &dilation = Size2D(1U, 1U)); } // namespace reference } // namespace validation } // namespace test -- cgit v1.2.1