diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2017-10-23 20:29:30 +0100 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:35:24 +0000 |
commit | 81a26ad6b626ce2da83659d7c6c17b6104d1f203 (patch) | |
tree | 536807115771f1a4b06048892d1d4e17c98779de | |
parent | 511347a7282b948bddfc071e9a8fa08e79da25b4 (diff) | |
download | ComputeLibrary-81a26ad6b626ce2da83659d7c6c17b6104d1f203.tar.gz |
COMPMID-643: Add bias to CLDepthwiseConvolution.
Change-Id: Ibfe7b8c1172d10cbcae7971fe86b82090519d31d
Reviewed-on: http://mpd-gerrit.cambridge.arm.com/92798
Tested-by: Kaizen <jeremy.johnson+kaizengerrit@arm.com>
Reviewed-by: Jaroslaw Rzepecki <jaroslaw.rzepecki@arm.com>
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
23 files changed, 304 insertions, 147 deletions
diff --git a/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h b/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h index 4e69f551b8..b3d1eaf3af 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.h @@ -49,10 +49,12 @@ public: * * @param[in] input Source tensor. DataType supported: F32. * @param[out] output Destination tensor. Data type supported: Same as @p input. - * @param[in] weights Weights tensor. These are 3D tensors with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] weights Weights tensor. A 3D tensor with dimensions [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. */ - void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info); + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const ICLTensor *biases, const PadStrideInfo &conv_info); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; @@ -63,6 +65,7 @@ private: const ICLTensor *_input; ICLTensor *_output; const ICLTensor *_weights; + const ICLTensor *_biases; unsigned int _conv_stride_x; unsigned int _conv_stride_y; unsigned int _conv_pad_x; diff --git a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h index ae56adfa30..7e786e8df8 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseIm2ColKernel.h @@ -56,8 +56,9 @@ public: * while every dimension above 3 represents a batch. Data types supported: Same as @p input * @param[in] kernel_dims The kernel dimensions (width and height). * @param[in] conv_info Contains padding and stride information described in @ref PadStrideInfo. + * @param[in] has_bias Boolean that specifies if the depthwise convolution has bias. */ - void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info); + void configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias = false); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; diff --git a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h index d493d9f052..7989257d34 100644 --- a/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h +++ b/arm_compute/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.h @@ -52,14 +52,16 @@ public: * * @param[in] input The input tensor to convert. 3 lower dimensions represent a single input [width, height, IFM]. Data type supported: F32. * @param[out] output The output tensor. Data type supported: same as @p input. + * @param[in] biases (Optional) The input biases to add. Shape [IFM]. Data type supported: same as @p input. */ - void configure(const ICLTensor *input, ICLTensor *output); + void configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *biases = nullptr); // Inherited methods overridden: void run(const Window &window, cl::CommandQueue &queue) override; private: const ICLTensor *_input; + const ICLTensor *_biases; ICLTensor *_output; }; } // arm_compute diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h b/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h index 53bc079cb2..7cf5f64115 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseConvolution.h @@ -53,10 +53,12 @@ public: * * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). * @param[out] output Destination tensor. Data type supported: same as @p input. - * @param[in] weights Weights tensor. These are 3D tensors with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] weights Weights tensor. A 3D tensor with shape [3, 3, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. */ - void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info); + void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const ICLTensor *biases, const PadStrideInfo &conv_info); // Inherited methods overriden: void run() override; @@ -84,9 +86,11 @@ public: * @param[in, out] input Source tensor. Data type supported: F32. (Written to only for border filling). * @param[out] output Destination tensor. Data type supported: same as @p input. * @param[in] weights Weights tensor. These are 3D tensors with shape [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. + * @param[in] biases (Optional) Biases tensor. A 1D tensor with shape [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p input. * @param[in] conv_info Padding and stride information to use for the convolution. */ - void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info); + void configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const ICLTensor *biases, const PadStrideInfo &conv_info); // Inherited methods overriden: void run() override; diff --git a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h index 7dabed1814..a38446293b 100644 --- a/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h +++ b/arm_compute/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.h @@ -53,17 +53,20 @@ public: * @param[in] input Source tensor. 3 lower dimensions represent a single input [width, height, IFM], * while every optional dimension from 4 and above represent a batch of inputs. Data types supported: F32. * @param[in] depthwise_weights Depthwise convolution weights tensor. These are 3D tensors with dimensions [kernel_x, kernel_y, IFM]. Data type supported: Same as @p input. + * @param[in] depthwise_biases (Optional) Biases tensor.Biases are 1D tensor with dimensions [IFM]. Must be nullptr if not needed. + * Data type supported: Same as @p weights. * @param[out] depthwise_out Depthwise destination tensor. * @param[in] pointwise_weights Pointwise convolution weights tensor. These are 4D tensors with dimensions [1, 1, IFM, OFM]. Data type supported: Same as @p input. - * @param[in] biases Biases tensor. Shared biases supported. Biases are 1D tensor with dimensions [OFM]. Data type supported: Same as @p weights. + * @param[in] pointwise_biases (Optional) Biases tensor. Biases are 1D tensor with dimensions [OFM]. Must be nullptr if not needed. + * Data type supported: Same as @p weights. * @param[out] output Destination tensor. 3 lower dimensions represent a single output [width, height, OFM], while the rest represent batch of outputs. * Data types supported: Same as @p input. * @param[in] depthwise_conv_info Contains padding and stride information described in @ref PadStrideInfo for depthwise convolution. * @param[in] pointwise_conv_info Contains padding and stride information described in @ref PadStrideInfo for pointwise convolution. */ - void configure(ICLTensor *input, const ICLTensor *depthwise_weights, ICLTensor *depthwise_out, const ICLTensor *pointwise_weights, const ICLTensor *biases, ICLTensor *output, - const PadStrideInfo &depthwise_conv_info, - const PadStrideInfo &pointwise_conv_info); + void configure(ICLTensor *input, const ICLTensor *depthwise_weights, const ICLTensor *depthwise_biases, ICLTensor *depthwise_out, + const ICLTensor *pointwise_weights, const ICLTensor *pointwise_biases, ICLTensor *output, + const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info); // Inherited methods overriden: void run() override; diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl index 081a4e6c44..411e097dc8 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution.cl @@ -169,14 +169,29 @@ inline float2 convolution3x3( * @param[in] weights_step_y weights_stride_y * number of elements along Y processed per workitem(in bytes) * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) * @param[in] weights_step_z weights_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the weights tensor + * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the biases vector + * @param[in] biases_ptr (Optional) Pointer to the biases vector. Supported data types: F16/F32 + * @param[in] biases_stride_x (Optional) Stride of the biases vector in X dimension (in bytes) + * @param[in] biases_step_x (Optional) biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in the biases vector */ -__kernel void depthwise_convolution_3x3(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(dst), TENSOR3D_DECLARATION(weights)) +__kernel void depthwise_convolution_3x3( + TENSOR3D_DECLARATION(src), + TENSOR3D_DECLARATION(dst), + TENSOR3D_DECLARATION(weights) +#if defined(HAS_BIAS) + , + VECTOR_DECLARATION(biases) +#endif //defined(HAS_BIAS) +) { Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT(weights); +#if defined(HAS_BIAS) + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif //defined(HAS_BIAS) uchar3 offset = (uchar3)(0, 1, 2) * (uchar3)weights_stride_y; float3 weights_values0 = vload3(0, (__global float *)(weights.ptr + offset.s0)); @@ -186,6 +201,9 @@ __kernel void depthwise_convolution_3x3(TENSOR3D_DECLARATION(src), TENSOR3D_DECL float2 pixels = convolution3x3(&src, weights_values0.s0, weights_values0.s1, weights_values0.s2, weights_values1.s0, weights_values1.s1, weights_values1.s2, weights_values2.s0, weights_values2.s1, weights_values2.s2); +#if defined(HAS_BIAS) + pixels += (float2)(*((__global float *)(biases.ptr + get_global_id(2) * biases_stride_x))); +#endif //defined(HAS_BIAS) vstore2(pixels, 0, (__global float *)dst.ptr); } @@ -197,24 +215,38 @@ __kernel void depthwise_convolution_3x3(TENSOR3D_DECLARATION(src), TENSOR3D_DECL * * @note Datatype and source width should be given as a preprocessor argument using -DDATA_TYPE=type and -DSRC_WIDTH=width. e.g. -DSRC_WIDTH=128 * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 - * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) - * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) - * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor - * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr - * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) - * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) - * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) - * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) - * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes) + * @param[in] src_step_z src_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] src_offset_first_element_in_bytes The offset of the first element in the source tensor + * @param[out] dst_ptr Pointer to the destination tensor. Same as @p src_ptr + * @param[in] dst_stride_x Stride of the destination tensor in X dimension (in bytes) + * @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] dst_stride_y Stride of the destination tensor in Y dimension (in bytes) + * @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor + * @param[in] biases_ptr (Optional) Pointer to the biases vector. Supported data types: F16/F32 + * @param[in] biases_stride_x (Optional) Stride of the biases vector in X dimension (in bytes) + * @param[in] biases_step_x (Optional) biases_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in the biases vector */ -__kernel void depthwise_weights_reshape(TENSOR3D_DECLARATION(src), IMAGE_DECLARATION(dst)) +__kernel void depthwise_weights_reshape( + TENSOR3D_DECLARATION(src), + IMAGE_DECLARATION(dst) +#ifdef HAS_BIAS + , + VECTOR_DECLARATION(biases) +#endif /* HAS_BIAS */ +) { Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src); +#ifdef HAS_BIAS + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); +#endif /* HAS_BIAS */ __global DATA_TYPE *input_ptr = (__global DATA_TYPE *)src.ptr; __global uchar *output_ptr = dst_ptr + dst_offset_first_element_in_bytes + get_global_id(1) * SRC_WIDTH * dst_stride_x + get_global_id(2) * dst_stride_y; @@ -223,6 +255,13 @@ __kernel void depthwise_weights_reshape(TENSOR3D_DECLARATION(src), IMAGE_DECLARA { *((__global DATA_TYPE *)(output_ptr + i * dst_stride_x)) = *input_ptr; } + +#if defined(HAS_BIAS) + if(get_global_id(1) == 0) + { + *((__global DATA_TYPE *)(output_ptr + SRC_WIDTH * get_global_size(1) * dst_stride_x)) = *((__global float *)(biases.ptr + get_global_id(2) * biases_stride_x)); + } +#endif // defined(HAS_BIAS) } #endif //defined(SRC_WIDTH) && defined(DATA_TYPE) @@ -279,6 +318,9 @@ __kernel void depthwise_im2col(TENSOR3D_DECLARATION(src), TENSOR3D_DECLARATION(d } } } +#if defined(HAS_BIAS) + *output_ptr = (DATA_TYPE)(1); +#endif // defined(HAS_BIAS) } #endif //defined(STRIDE_X) && defined(STRIDE_Y) && defined(PAD_LEFT) && defined(PAD_TOP) && defined(PAD_RIGHT) && defined(PAD_BOTTOM) && defined(KERNEL_WIDTH) && defined(KERNEL_HEIGHT) && defined(SRC_WIDTH) && defined(DATA_TYPE) diff --git a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp index 6e56835115..2d0c416d0a 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolution3x3Kernel.cpp @@ -37,7 +37,7 @@ using namespace arm_compute; CLDepthwiseConvolution3x3Kernel::CLDepthwiseConvolution3x3Kernel() - : _border_size(0), _input(), _output(), _weights(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_x(0), _conv_pad_y(0) + : _border_size(0), _input(), _output(), _weights(), _biases(), _conv_stride_x(0), _conv_stride_y(0), _conv_pad_x(0), _conv_pad_y(0) { } @@ -46,13 +46,20 @@ BorderSize CLDepthwiseConvolution3x3Kernel::border_size() const return _border_size; } -void CLDepthwiseConvolution3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info) +void CLDepthwiseConvolution3x3Kernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const ICLTensor *biases, const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(weights, 1, DataType::F32); ARM_COMPUTE_ERROR_ON(weights->info()->dimension(0) != 3 || weights->info()->dimension(1) != 3); + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(weights, biases); + ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != weights->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); + } + std::pair<unsigned int, unsigned int> expected_output = scaled_dimensions(input->info()->tensor_shape().x(), input->info()->tensor_shape().y(), weights->info()->tensor_shape().x(), weights->info()->tensor_shape().y(), conv_info); @@ -64,6 +71,7 @@ void CLDepthwiseConvolution3x3Kernel::configure(const ICLTensor *input, ICLTenso _input = input; _output = output; _weights = weights; + _biases = biases; _conv_stride_x = conv_info.stride().first; _conv_stride_y = conv_info.stride().second; _conv_pad_x = conv_info.pad().first; @@ -73,6 +81,10 @@ void CLDepthwiseConvolution3x3Kernel::configure(const ICLTensor *input, ICLTenso // Set build options ARM_COMPUTE_ERROR_ON(_conv_stride_x < 1 || _conv_stride_x > 3); std::set<std::string> options{ "-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x) }; + if(_biases != nullptr) + { + options.emplace("-DHAS_BIAS"); + } _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_convolution_3x3", options)); @@ -111,6 +123,15 @@ void CLDepthwiseConvolution3x3Kernel::run(const Window &window, cl::CommandQueue slice_weights.set_dimension_step(Window::DimX, 0); slice_weights.set_dimension_step(Window::DimY, 0); + // Set biases + if(_biases != nullptr) + { + unsigned int idx = 3 * num_arguments_per_3D_tensor(); + Window slice_biases; + slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape()); + add_1D_tensor_argument(idx, _biases, slice_biases); + } + do { unsigned int idx = 0; diff --git a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp index 5c7fe7e0b4..743cd4a38f 100644 --- a/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseIm2ColKernel.cpp @@ -41,13 +41,13 @@ CLDepthwiseIm2ColKernel::CLDepthwiseIm2ColKernel() { } -void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info) +void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *output, const Size2D &kernel_dims, const PadStrideInfo &conv_info, bool has_bias) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != output->info()->dimension(2)); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (kernel_dims.width * kernel_dims.height)); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (kernel_dims.width * kernel_dims.height + ((has_bias) ? 1 : 0))); _input = input; _output = output; @@ -66,7 +66,10 @@ void CLDepthwiseIm2ColKernel::configure(const ICLTensor *input, ICLTensor *outpu build_opts.emplace("-DSRC_HEIGHT=" + support::cpp11::to_string(input->info()->dimension(1))); build_opts.emplace("-DKERNEL_WIDTH=" + support::cpp11::to_string(kernel_dims.width)); build_opts.emplace("-DKERNEL_HEIGHT=" + support::cpp11::to_string(kernel_dims.height)); - + if(has_bias) + { + build_opts.emplace("-DHAS_BIAS"); + } _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_im2col", build_opts)); // Configure kernel window diff --git a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp index 68de68b4c5..81dd6b42cc 100644 --- a/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseWeightsReshapeKernel.cpp @@ -35,19 +35,28 @@ using namespace arm_compute; CLDepthwiseWeightsReshapeKernel::CLDepthwiseWeightsReshapeKernel() - : _input(nullptr), _output(nullptr) + : _input(nullptr), _biases(nullptr), _output(nullptr) { } -void CLDepthwiseWeightsReshapeKernel::configure(const ICLTensor *input, ICLTensor *output) +void CLDepthwiseWeightsReshapeKernel::configure(const ICLTensor *input, ICLTensor *output, const ICLTensor *biases) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, output); ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, output); ARM_COMPUTE_ERROR_ON(input->info()->dimension(2) != output->info()->dimension(1)); - ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != input->info()->dimension(0) * input->info()->dimension(1)); + ARM_COMPUTE_ERROR_ON(output->info()->dimension(0) != (input->info()->dimension(0) * input->info()->dimension(1) + ((biases != nullptr) ? 1 : 0))); + + if(biases != nullptr) + { + ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, biases); + ARM_COMPUTE_ERROR_ON_MISMATCHING_FIXED_POINT(input, biases); + ARM_COMPUTE_ERROR_ON(biases->info()->dimension(0) != input->info()->dimension(2)); + ARM_COMPUTE_ERROR_ON(biases->info()->num_dimensions() > 1); + } _input = input; + _biases = biases; _output = output; // Create kernel @@ -55,6 +64,10 @@ void CLDepthwiseWeightsReshapeKernel::configure(const ICLTensor *input, ICLTenso build_opts.emplace("-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); build_opts.emplace("-DSRC_WIDTH=" + support::cpp11::to_string(input->info()->dimension(0))); + if(_biases != nullptr) + { + build_opts.emplace("-DHAS_BIAS"); + } _kernel = static_cast<cl::Kernel>(CLKernelLibrary::get().create_kernel("depthwise_weights_reshape", build_opts)); @@ -84,6 +97,15 @@ void CLDepthwiseWeightsReshapeKernel::run(const Window &window, cl::CommandQueue slice_out.set(Window::DimX, Window::Dimension(0, 0, 0)); slice_out.set(Window::DimY, Window::Dimension(0, 0, 0)); + // Set biases + if(_biases != nullptr) + { + unsigned int idx = num_arguments_per_3D_tensor() + num_arguments_per_2D_tensor(); + Window slice_biases; + slice_biases.use_tensor_dimensions(_biases->info()->tensor_shape()); + add_1D_tensor_argument(idx, _biases, slice_biases); + } + do { unsigned int idx = 0; diff --git a/src/runtime/CL/functions/CLDepthwiseConvolution.cpp b/src/runtime/CL/functions/CLDepthwiseConvolution.cpp index 22c037fc2a..ffd9f40f4f 100644 --- a/src/runtime/CL/functions/CLDepthwiseConvolution.cpp +++ b/src/runtime/CL/functions/CLDepthwiseConvolution.cpp @@ -35,13 +35,13 @@ CLDepthwiseConvolution3x3::CLDepthwiseConvolution3x3() { } -void CLDepthwiseConvolution3x3::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info) +void CLDepthwiseConvolution3x3::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const ICLTensor *biases, const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); ARM_COMPUTE_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); - _kernel.configure(input, output, weights, conv_info); + _kernel.configure(input, output, weights, biases, conv_info); _border_handler.configure(input, _kernel.border_size(), BorderMode::CONSTANT, PixelValue(0)); } @@ -57,7 +57,7 @@ CLDepthwiseConvolution::CLDepthwiseConvolution() { } -void CLDepthwiseConvolution::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const PadStrideInfo &conv_info) +void CLDepthwiseConvolution::configure(ICLTensor *input, ICLTensor *output, const ICLTensor *weights, const ICLTensor *biases, const PadStrideInfo &conv_info) { ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F32); ARM_COMPUTE_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(output, 1, DataType::F32); @@ -68,12 +68,14 @@ void CLDepthwiseConvolution::configure(ICLTensor *input, ICLTensor *output, cons const size_t weights_h = weights->info()->dimension(1); const size_t weights_z = weights->info()->dimension(2); + bool has_bias = (biases != nullptr); + unsigned int conv_w = 0; unsigned int conv_h = 0; std::tie(conv_w, conv_h) = scaled_dimensions(input->info()->dimension(0), input->info()->dimension(1), weights_w, weights_h, conv_info); // Set up intermediate tensors - const size_t patch_size = weights_w * weights_h; + const size_t patch_size = weights_w * weights_h + ((has_bias) ? 1 : 0); const size_t conv_size = conv_w * conv_h; TensorShape shape_im2col = input->info()->tensor_shape(); @@ -96,8 +98,8 @@ void CLDepthwiseConvolution::configure(ICLTensor *input, ICLTensor *output, cons _v2mm_output.allocator()->init(info_v2mm_out); // Configure kernels - _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info); - _weights_reshape_kernel.configure(weights, &_weights_reshaped); + _im2col_kernel.configure(input, &_input_reshaped, Size2D(weights_w, weights_h), conv_info, has_bias); + _weights_reshape_kernel.configure(weights, &_weights_reshaped, biases); _v2mm_kernel.configure(&_input_reshaped, &_weights_reshaped, &_v2mm_output); _vector_to_tensor_kernel.configure(&_v2mm_output, output, conv_w, conv_h); diff --git a/src/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.cpp b/src/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.cpp index c325b3e01f..14ab808dbf 100644 --- a/src/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.cpp +++ b/src/runtime/CL/functions/CLDepthwiseSeparableConvolutionLayer.cpp @@ -35,12 +35,12 @@ CLDepthwiseSeparableConvolutionLayer::CLDepthwiseSeparableConvolutionLayer() { } -void CLDepthwiseSeparableConvolutionLayer::configure(ICLTensor *input, const ICLTensor *depthwise_weights, ICLTensor *depthwise_out, const ICLTensor *pointwise_weights, const ICLTensor *biases, - ICLTensor *output, +void CLDepthwiseSeparableConvolutionLayer::configure(ICLTensor *input, const ICLTensor *depthwise_weights, const ICLTensor *depthwise_biases, ICLTensor *depthwise_out, + const ICLTensor *pointwise_weights, const ICLTensor *pointwise_biases, ICLTensor *output, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info) { - _depthwise_conv.configure(input, depthwise_out, depthwise_weights, depthwise_conv_info); - _pointwise_conv.configure(depthwise_out, pointwise_weights, biases, output, pointwise_conv_info); + _depthwise_conv.configure(input, depthwise_out, depthwise_weights, depthwise_biases, depthwise_conv_info); + _pointwise_conv.configure(depthwise_out, pointwise_weights, pointwise_biases, output, pointwise_conv_info); } void CLDepthwiseSeparableConvolutionLayer::run() diff --git a/tests/benchmark/fixtures/DepthwiseConvolutionFixture.h b/tests/benchmark/fixtures/DepthwiseConvolutionFixture.h index e080fe253b..0095509cf7 100644 --- a/tests/benchmark/fixtures/DepthwiseConvolutionFixture.h +++ b/tests/benchmark/fixtures/DepthwiseConvolutionFixture.h @@ -40,7 +40,7 @@ class DepthwiseConvolutionFixture : public framework::Fixture { public: template <typename...> - void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches) + void setup(TensorShape src_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape dst_shape, PadStrideInfo info, DataType data_type, int batches) { // Set batched in source and destination shapes const unsigned int fixed_point_position = 4; @@ -50,14 +50,16 @@ public: // Create tensors src = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position); weights = create_tensor<TensorType>(weights_shape, data_type, 1, fixed_point_position); + biases = create_tensor<TensorType>(biases_shape, data_type, 1, fixed_point_position); dst = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position); // Create and configure function - depth_conv.configure(&src, &dst, &weights, info); + depth_conv.configure(&src, &dst, &weights, &biases, info); // Allocate tensors src.allocator()->allocate(); weights.allocator()->allocate(); + biases.allocator()->allocate(); dst.allocator()->allocate(); // Fill tensors @@ -74,12 +76,14 @@ public: { src.allocator()->free(); weights.allocator()->free(); + biases.allocator()->free(); dst.allocator()->free(); } private: TensorType src{}; TensorType weights{}; + TensorType biases{}; TensorType dst{}; Function depth_conv{}; }; diff --git a/tests/benchmark/fixtures/DepthwiseSeparableConvolutionLayerFixture.h b/tests/benchmark/fixtures/DepthwiseSeparableConvolutionLayerFixture.h index 928b0c7328..fa1544042f 100644 --- a/tests/benchmark/fixtures/DepthwiseSeparableConvolutionLayerFixture.h +++ b/tests/benchmark/fixtures/DepthwiseSeparableConvolutionLayerFixture.h @@ -40,7 +40,8 @@ class DepthwiseSeparableConvolutionLayerFixture : public framework::Fixture { public: template <typename...> - void setup(TensorShape src_shape, TensorShape depthwise_weights_shape, TensorShape depthwise_out_shape, TensorShape pointwise_weights_shape, TensorShape biases_shape, TensorShape dst_shape, + void setup(TensorShape src_shape, TensorShape depthwise_weights_shape, TensorShape depthwise_biases_shape, TensorShape depthwise_out_shape, TensorShape pointwise_weights_shape, + TensorShape pointwise_biases_shape, TensorShape dst_shape, PadStrideInfo pad_stride_depthwise_info, PadStrideInfo pad_stride_pointwise_info, DataType data_type, int batches) { // Set batched in source and destination shapes @@ -51,27 +52,30 @@ public: src = create_tensor<TensorType>(src_shape, data_type, 1, fixed_point_position); depthwise_weights = create_tensor<TensorType>(depthwise_weights_shape, data_type, 1, fixed_point_position); + depthwise_biases = create_tensor<TensorType>(depthwise_biases_shape, data_type, 1, fixed_point_position); depthwise_out = create_tensor<TensorType>(depthwise_out_shape, data_type, 1, fixed_point_position); pointwise_weights = create_tensor<TensorType>(pointwise_weights_shape, data_type, 1, fixed_point_position); - biases = create_tensor<TensorType>(biases_shape, data_type, 1, fixed_point_position); + pointwise_biases = create_tensor<TensorType>(pointwise_biases_shape, data_type, 1, fixed_point_position); dst = create_tensor<TensorType>(dst_shape, data_type, 1, fixed_point_position); // Create and configure function - depth_sep_conv_layer.configure(&src, &depthwise_weights, &depthwise_out, &pointwise_weights, &biases, &dst, pad_stride_depthwise_info, pad_stride_pointwise_info); + depth_sep_conv_layer.configure(&src, &depthwise_weights, &depthwise_biases, &depthwise_out, &pointwise_weights, &pointwise_biases, &dst, pad_stride_depthwise_info, pad_stride_pointwise_info); // Allocate tensors src.allocator()->allocate(); depthwise_weights.allocator()->allocate(); + depthwise_biases.allocator()->allocate(); depthwise_out.allocator()->allocate(); pointwise_weights.allocator()->allocate(); - biases.allocator()->allocate(); + pointwise_biases.allocator()->allocate(); dst.allocator()->allocate(); // Fill tensors library->fill_tensor_uniform(Accessor(src), 0); library->fill_tensor_uniform(Accessor(depthwise_weights), 1); - library->fill_tensor_uniform(Accessor(pointwise_weights), 2); - library->fill_tensor_uniform(Accessor(biases), 3); + library->fill_tensor_uniform(Accessor(depthwise_biases), 2); + library->fill_tensor_uniform(Accessor(pointwise_weights), 3); + library->fill_tensor_uniform(Accessor(pointwise_biases), 4); } void run() @@ -83,18 +87,20 @@ public: { src.allocator()->free(); depthwise_weights.allocator()->free(); + depthwise_biases.allocator()->free(); depthwise_out.allocator()->free(); pointwise_weights.allocator()->free(); - biases.allocator()->free(); + pointwise_biases.allocator()->free(); dst.allocator()->free(); } private: TensorType src{}; TensorType depthwise_weights{}; + TensorType depthwise_biases{}; TensorType depthwise_out{}; TensorType pointwise_weights{}; - TensorType biases{}; + TensorType pointwise_biases{}; TensorType dst{}; Function depth_sep_conv_layer{}; }; diff --git a/tests/datasets/DepthwiseConvolutionDataset.h b/tests/datasets/DepthwiseConvolutionDataset.h index 8cceae0083..430d2c9aca 100644 --- a/tests/datasets/DepthwiseConvolutionDataset.h +++ b/tests/datasets/DepthwiseConvolutionDataset.h @@ -38,16 +38,18 @@ namespace datasets class DepthwiseConvolutionDataset { public: - using type = std::tuple<TensorShape, TensorShape, TensorShape, PadStrideInfo>; + using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape, PadStrideInfo>; struct iterator { iterator(std::vector<TensorShape>::const_iterator src_it, std::vector<TensorShape>::const_iterator weights_it, + std::vector<TensorShape>::const_iterator biases_it, std::vector<TensorShape>::const_iterator dst_it, std::vector<PadStrideInfo>::const_iterator infos_it) : _src_it{ std::move(src_it) }, _weights_it{ std::move(weights_it) }, + _biases_it{ std::move(biases_it) }, _dst_it{ std::move(dst_it) }, _infos_it{ std::move(infos_it) } { @@ -58,6 +60,7 @@ public: std::stringstream description; description << "In=" << *_src_it << ":"; description << "Weights=" << *_weights_it << ":"; + description << "Biases=" << *_biases_it << ":"; description << "Out=" << *_dst_it << ":"; description << "Info=" << *_infos_it; return description.str(); @@ -65,13 +68,14 @@ public: DepthwiseConvolutionDataset::type operator*() const { - return std::make_tuple(*_src_it, *_weights_it, *_dst_it, *_infos_it); + return std::make_tuple(*_src_it, *_weights_it, *_biases_it, *_dst_it, *_infos_it); } iterator &operator++() { ++_src_it; ++_weights_it; + ++_biases_it; ++_dst_it; ++_infos_it; @@ -81,24 +85,26 @@ public: private: std::vector<TensorShape>::const_iterator _src_it; std::vector<TensorShape>::const_iterator _weights_it; + std::vector<TensorShape>::const_iterator _biases_it; std::vector<TensorShape>::const_iterator _dst_it; std::vector<PadStrideInfo>::const_iterator _infos_it; }; iterator begin() const { - return iterator(_src_shapes.begin(), _weight_shapes.begin(), _dst_shapes.begin(), _infos.begin()); + return iterator(_src_shapes.begin(), _weight_shapes.begin(), _biases_shapes.begin(), _dst_shapes.begin(), _infos.begin()); } int size() const { - return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_dst_shapes.size(), _infos.size()))); + return std::min(_src_shapes.size(), std::min(_weight_shapes.size(), std::min(_biases_shapes.size(), std::min(_dst_shapes.size(), _infos.size())))); } - void add_config(TensorShape src, TensorShape weights, TensorShape dst, PadStrideInfo info) + void add_config(TensorShape src, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo info) { _src_shapes.emplace_back(std::move(src)); _weight_shapes.emplace_back(std::move(weights)); + _biases_shapes.emplace_back(std::move(biases)); _dst_shapes.emplace_back(std::move(dst)); _infos.emplace_back(std::move(info)); } @@ -110,6 +116,7 @@ protected: private: std::vector<TensorShape> _src_shapes{}; std::vector<TensorShape> _weight_shapes{}; + std::vector<TensorShape> _biases_shapes{}; std::vector<TensorShape> _dst_shapes{}; std::vector<PadStrideInfo> _infos{}; }; @@ -118,20 +125,20 @@ class SmallDepthwiseConvolutionDataset final : public DepthwiseConvolutionDatase public: SmallDepthwiseConvolutionDataset() { - add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0)); - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1)); - add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(23U, 27U, 5U), TensorShape(3U, 5U, 5U), TensorShape(5U), TensorShape(11U, 23U, 5U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(7U, 3U, 7U), TensorShape(7U), TensorShape(10U, 13U, 7U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1)); + add_config(TensorShape(17U, 31U, 2U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(23U, 27U, 5U), TensorShape(11U, 3U, 5U), TensorShape(5U), TensorShape(13U, 13U, 5U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(17U, 31U, 2U, 3U), TensorShape(5U, 9U, 2U), TensorShape(2U), TensorShape(15U, 13U, 2U, 3U), PadStrideInfo(1, 2, 1, 1)); // Asymmetric padding - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); - add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 1, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 2, 1, 2, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(11U, 12U, 7U), PadStrideInfo(3, 2, 1, 3, 0, 2, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 1, 0, 1, 0, DimensionRoundingType::FLOOR)); + add_config(TensorShape(33U, 27U, 7U), TensorShape(5U, 7U, 7U), TensorShape(7U), TensorShape(10U, 11U, 7U), PadStrideInfo(3, 2, 0, 1, 0, 1, DimensionRoundingType::FLOOR)); } }; @@ -140,12 +147,12 @@ class LargeDepthwiseConvolutionDataset final : public DepthwiseConvolutionDatase public: LargeDepthwiseConvolutionDataset() { - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); + add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1)); + add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); } }; @@ -154,10 +161,10 @@ class SmallDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDat public: SmallDepthwiseConvolutionDataset3x3() { - add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0)); - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1)); - add_config(TensorShape(21U, 31U, 9U), TensorShape(3U, 3U, 9U), TensorShape(21U, 15U, 9U), PadStrideInfo(1, 2, 1, 0)); - add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1)); + add_config(TensorShape(7U, 7U, 3U), TensorShape(3U, 3U, 3U), TensorShape(3U), TensorShape(5U, 5U, 3U), PadStrideInfo(1, 1, 0, 0)); + add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(11U, 14U, 11U), PadStrideInfo(3, 2, 1, 1)); + add_config(TensorShape(21U, 31U, 9U), TensorShape(3U, 3U, 9U), TensorShape(9U), TensorShape(21U, 15U, 9U), PadStrideInfo(1, 2, 1, 0)); + add_config(TensorShape(33U, 27U, 11U), TensorShape(3U, 3U, 11U), TensorShape(11U), TensorShape(31U, 14U, 11U), PadStrideInfo(1, 2, 0, 1)); } }; @@ -166,12 +173,12 @@ class LargeDepthwiseConvolutionDataset3x3 final : public DepthwiseConvolutionDat public: LargeDepthwiseConvolutionDataset3x3() { - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); - add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); - add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1)); - add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); + add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(116U, 275U, 55U), PadStrideInfo(2, 1, 0, 0)); + add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(111U, 138U, 77U), PadStrideInfo(3, 2, 1, 0)); + add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(177U, 156U, 22U), PadStrideInfo(1, 2, 1, 1)); + add_config(TensorShape(233U, 277U, 55U), TensorShape(3U, 3U, 55U), TensorShape(55U), TensorShape(231U, 138U, 55U), PadStrideInfo(1, 2, 0, 0)); + add_config(TensorShape(333U, 277U, 77U), TensorShape(3U, 3U, 77U), TensorShape(77U), TensorShape(166U, 93U, 77U), PadStrideInfo(2, 3, 0, 1)); + add_config(TensorShape(177U, 311U, 22U), TensorShape(3U, 3U, 22U), TensorShape(22U), TensorShape(89U, 311U, 22U), PadStrideInfo(2, 1, 1, 1)); } }; } // namespace datasets diff --git a/tests/datasets/DepthwiseSeparableConvolutionLayerDataset.h b/tests/datasets/DepthwiseSeparableConvolutionLayerDataset.h index 6b39d3a5c8..efc0cbd27e 100644 --- a/tests/datasets/DepthwiseSeparableConvolutionLayerDataset.h +++ b/tests/datasets/DepthwiseSeparableConvolutionLayerDataset.h @@ -38,12 +38,13 @@ namespace datasets class DepthwiseSeparableConvolutionLayerDataset { public: - using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape, TensorShape, TensorShape, PadStrideInfo, PadStrideInfo>; + using type = std::tuple<TensorShape, TensorShape, TensorShape, TensorShape, TensorShape, TensorShape, TensorShape, PadStrideInfo, PadStrideInfo>; struct iterator { iterator(std::vector<TensorShape>::const_iterator src_it, std::vector<TensorShape>::const_iterator filter_it, + std::vector<TensorShape>::const_iterator filter_biases_it, std::vector<TensorShape>::const_iterator depthwise_out_it, std::vector<TensorShape>::const_iterator weights_it, std::vector<TensorShape>::const_iterator biases_it, @@ -52,6 +53,7 @@ public: std::vector<PadStrideInfo>::const_iterator pointwise_infos_it) : _src_it{ std::move(src_it) }, _filter_it{ std::move(filter_it) }, + _filter_biases_it{ std::move(filter_biases_it) }, _depthwise_out_it{ std::move(depthwise_out_it) }, _weights_it{ std::move(weights_it) }, _biases_it{ std::move(biases_it) }, @@ -66,6 +68,7 @@ public: std::stringstream description; description << "In=" << *_src_it << ":"; description << "Filter=" << *_filter_it << ":"; + description << "FilterBiases=" << *_filter_biases_it << ":"; description << "DepthwiseOut=" << *_depthwise_out_it << ":"; description << "Weights=" << *_weights_it << ":"; description << "Biases=" << *_biases_it << ":"; @@ -77,13 +80,14 @@ public: DepthwiseSeparableConvolutionLayerDataset::type operator*() const { - return std::make_tuple(*_src_it, *_filter_it, *_depthwise_out_it, *_weights_it, *_biases_it, *_dst_it, *_depthwise_infos_it, *_pointwise_infos_it); + return std::make_tuple(*_src_it, *_filter_it, *_filter_biases_it, *_depthwise_out_it, *_weights_it, *_biases_it, *_dst_it, *_depthwise_infos_it, *_pointwise_infos_it); } iterator &operator++() { ++_src_it; ++_filter_it; + ++_filter_biases_it; ++_depthwise_out_it; ++_weights_it; ++_biases_it; @@ -97,6 +101,7 @@ public: private: std::vector<TensorShape>::const_iterator _src_it; std::vector<TensorShape>::const_iterator _filter_it; + std::vector<TensorShape>::const_iterator _filter_biases_it; std::vector<TensorShape>::const_iterator _depthwise_out_it; std::vector<TensorShape>::const_iterator _weights_it; std::vector<TensorShape>::const_iterator _biases_it; @@ -107,20 +112,24 @@ public: iterator begin() const { - return iterator(_src_shapes.begin(), _filter_shapes.begin(), _depthwise_out_shapes.begin(), _weight_shapes.begin(), _bias_shapes.begin(), _dst_shapes.begin(), _depthwise_infos.begin(), + return iterator(_src_shapes.begin(), _filter_shapes.begin(), _filter_biases_shapes.begin(), _depthwise_out_shapes.begin(), _weight_shapes.begin(), _bias_shapes.begin(), _dst_shapes.begin(), + _depthwise_infos.begin(), _pointwise_infos.begin()); } int size() const { - return std::min(_src_shapes.size(), std::min(_filter_shapes.size(), std::min(_depthwise_out_shapes.size(), std::min(_weight_shapes.size(), std::min(_bias_shapes.size(), std::min(_dst_shapes.size(), - std::min(_depthwise_infos.size(), _pointwise_infos.size()))))))); + return std::min(_src_shapes.size(), std::min(_filter_shapes.size(), std::min(_filter_biases_shapes.size(), std::min(_depthwise_out_shapes.size(), std::min(_weight_shapes.size(), + std::min(_bias_shapes.size(), std::min(_dst_shapes.size(), + std::min(_depthwise_infos.size(), _pointwise_infos.size())))))))); } - void add_config(TensorShape src, TensorShape filter, TensorShape depthwise_out, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo depthwise_info, PadStrideInfo pointwise_info) + void add_config(TensorShape src, TensorShape filter, TensorShape filter_bias, TensorShape depthwise_out, TensorShape weights, TensorShape biases, TensorShape dst, PadStrideInfo depthwise_info, + PadStrideInfo pointwise_info) { _src_shapes.emplace_back(std::move(src)); _filter_shapes.emplace_back(std::move(filter)); + _filter_biases_shapes.emplace_back(std::move(filter_bias)); _depthwise_out_shapes.emplace_back(std::move(depthwise_out)); _weight_shapes.emplace_back(std::move(weights)); _bias_shapes.emplace_back(std::move(biases)); @@ -136,6 +145,7 @@ protected: private: std::vector<TensorShape> _src_shapes{}; std::vector<TensorShape> _filter_shapes{}; + std::vector<TensorShape> _filter_biases_shapes{}; std::vector<TensorShape> _depthwise_out_shapes{}; std::vector<TensorShape> _weight_shapes{}; std::vector<TensorShape> _bias_shapes{}; diff --git a/tests/datasets/MobileNetDepthwiseConvolutionDataset.h b/tests/datasets/MobileNetDepthwiseConvolutionDataset.h index c9d98d4185..c2690d9ad6 100644 --- a/tests/datasets/MobileNetDepthwiseConvolutionDataset.h +++ b/tests/datasets/MobileNetDepthwiseConvolutionDataset.h @@ -42,14 +42,14 @@ class MobileNetDepthwiseConvolutionDataset final : public DepthwiseConvolutionDa public: MobileNetDepthwiseConvolutionDataset() { - add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(3U, 3U, 1024U), PadStrideInfo(2, 2, 1, 1)); - add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 1, 1)); - add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 1, 1)); - add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 1, 1)); - add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 1)); - add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U), TensorShape(56U, 56U, 64U), PadStrideInfo(2, 2, 1, 1)); - add_config(TensorShape(112U, 112U, 32U), TensorShape(3U, 3U, 32U), TensorShape(112U, 112U, 32U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(7U, 7U, 1024U), TensorShape(3U, 3U, 1024U), TensorShape(1024U), TensorShape(3U, 3U, 1024U), PadStrideInfo(2, 2, 1, 1)); + add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(512U), TensorShape(7U, 7U, 512U), PadStrideInfo(2, 2, 1, 1)); + add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(14U, 14U, 256U), PadStrideInfo(2, 2, 1, 1)); + add_config(TensorShape(28U, 28U, 256U), TensorShape(3U, 3U, 256U), TensorShape(256U), TensorShape(28U, 28U, 256U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(28U, 28U, 128U), PadStrideInfo(2, 2, 1, 1)); + add_config(TensorShape(56U, 56U, 128U), TensorShape(3U, 3U, 128U), TensorShape(128U), TensorShape(56U, 56U, 128U), PadStrideInfo(1, 1, 1, 1)); + add_config(TensorShape(112U, 112U, 64U), TensorShape(3U, 3U, 64U), TensorShape(64U), TensorShape(56U, 56U, 64U), PadStrideInfo(2, 2, 1, 1)); + add_config(TensorShape(112U, 112U, 32U), TensorShape(3U, 3U, 32U), TensorShape(32U), TensorShape(112U, 112U, 32U), PadStrideInfo(1, 1, 1, 1)); } }; } // namespace datasets diff --git a/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h b/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h index e61578818a..d0f602daf0 100644 --- a/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h +++ b/tests/datasets/MobileNetDepthwiseSeparableConvolutionLayerDataset.h @@ -42,9 +42,10 @@ class MobileNetDepthwiseSeparableConvolutionLayerDataset final : public Depthwis public: MobileNetDepthwiseSeparableConvolutionLayerDataset() { - add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 512U), TensorShape(512U), TensorShape(14U, 14U, 512U), PadStrideInfo(1, 1, 1, - 1, - DimensionRoundingType::FLOOR), + add_config(TensorShape(14U, 14U, 512U), TensorShape(3U, 3U, 512U), TensorShape(512U), TensorShape(14U, 14U, 512U), TensorShape(1U, 1U, 512U, 512U), TensorShape(512U), TensorShape(14U, 14U, 512U), + PadStrideInfo(1, 1, 1, + 1, + DimensionRoundingType::FLOOR), PadStrideInfo(1, 1, 0, 0, DimensionRoundingType::FLOOR)); } }; diff --git a/tests/validation/CPP/DepthwiseConvolution.cpp b/tests/validation/CPP/DepthwiseConvolution.cpp index b57c2686f6..e29d014f77 100644 --- a/tests/validation/CPP/DepthwiseConvolution.cpp +++ b/tests/validation/CPP/DepthwiseConvolution.cpp @@ -45,7 +45,7 @@ namespace reference * */ template <typename T> -SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const TensorShape &dst_shape, const PadStrideInfo &conv_info) +SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info) { // Create reference SimpleTensor<T> dst{ dst_shape, src.data_type(), 1, src.fixed_point_position() }; @@ -97,7 +97,7 @@ SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTe } coords.set(0, x); coords.set(1, y); - dst[out_pos++] = saturate_cast<T>(val); + dst[out_pos++] = saturate_cast<T>(val + *static_cast<const T *>(biases(Coordinates(z)))); } } } @@ -106,7 +106,8 @@ SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTe return dst; } -template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const TensorShape &dst_shape, const PadStrideInfo &conv_info); +template SimpleTensor<float> depthwise_convolution(const SimpleTensor<float> &src, const SimpleTensor<float> &weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape, + const PadStrideInfo &conv_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/CPP/DepthwiseConvolution.h b/tests/validation/CPP/DepthwiseConvolution.h index 6be80fc07f..e8c55b16a8 100644 --- a/tests/validation/CPP/DepthwiseConvolution.h +++ b/tests/validation/CPP/DepthwiseConvolution.h @@ -36,7 +36,7 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const TensorShape &dst_shape, const PadStrideInfo &conv_info); +SimpleTensor<T> depthwise_convolution(const SimpleTensor<T> &src, const SimpleTensor<T> &weights, const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &conv_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp b/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp index 3942ecf02a..8c8e50d349 100644 --- a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp +++ b/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.cpp @@ -40,19 +40,22 @@ namespace reference { // Depthwise separable convolution layer template <typename T> -SimpleTensor<T> depthwise_separable_convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &depthwise_weights, const TensorShape &depthwise_out_shape, +SimpleTensor<T> depthwise_separable_convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &depthwise_weights, const SimpleTensor<T> &depthwise_biases, + const TensorShape &depthwise_out_shape, const SimpleTensor<T> &pointwise_weights, - const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info) + const SimpleTensor<T> &pointwise_biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info) { // Compute reference - SimpleTensor<T> depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_out_shape, depthwise_conv_info); - SimpleTensor<T> dst = convolution_layer(depthwise_out, pointwise_weights, biases, dst_shape, pointwise_conv_info); + SimpleTensor<T> depthwise_out = depthwise_convolution(src, depthwise_weights, depthwise_biases, depthwise_out_shape, depthwise_conv_info); + SimpleTensor<T> dst = convolution_layer(depthwise_out, pointwise_weights, pointwise_biases, dst_shape, pointwise_conv_info); return dst; } -template SimpleTensor<float> depthwise_separable_convolution_layer(const SimpleTensor<float> &in, const SimpleTensor<float> &depthwise_weights, const TensorShape &depthwise_out_shape, - const SimpleTensor<float> &pointwise_weights, const SimpleTensor<float> &biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info); +template SimpleTensor<float> depthwise_separable_convolution_layer(const SimpleTensor<float> &in, const SimpleTensor<float> &depthwise_weights, const SimpleTensor<float> &depthwise_biases, + const TensorShape &depthwise_out_shape, + const SimpleTensor<float> &pointwise_weights, const SimpleTensor<float> &pointwise_biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, + const PadStrideInfo &pointwise_conv_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.h b/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.h index 71cd013424..0fcce2c964 100644 --- a/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.h +++ b/tests/validation/CPP/DepthwiseSeparableConvolutionLayer.h @@ -36,9 +36,10 @@ namespace validation namespace reference { template <typename T> -SimpleTensor<T> depthwise_separable_convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &depthwise_weights, const TensorShape &depthwise_out_shape, - const SimpleTensor<T> &pointwise_weights, - const SimpleTensor<T> &biases, const TensorShape &dst_shape, const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info); +SimpleTensor<T> depthwise_separable_convolution_layer(const SimpleTensor<T> &src, const SimpleTensor<T> &depthwise_weights, const SimpleTensor<T> &depthwise_biases, + const TensorShape &depthwise_out_shape, + const SimpleTensor<T> &pointwise_weights, const SimpleTensor<T> &pointwise_biases, const TensorShape &dst_shape, + const PadStrideInfo &depthwise_conv_info, const PadStrideInfo &pointwise_conv_info); } // namespace reference } // namespace validation } // namespace test diff --git a/tests/validation/fixtures/DepthwiseConvolutionFixture.h b/tests/validation/fixtures/DepthwiseConvolutionFixture.h index 4a890f6333..d883807537 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionFixture.h @@ -47,10 +47,10 @@ class DepthwiseConvolutionValidationFixture : public framework::Fixture { public: template <typename...> - void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape out_shape, PadStrideInfo pad_stride_info) + void setup(TensorShape in_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape out_shape, PadStrideInfo pad_stride_info) { - _target = compute_target(in_shape, weights_shape, out_shape, pad_stride_info); - _reference = compute_reference(in_shape, weights_shape, out_shape, pad_stride_info); + _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info); + _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info); } protected: @@ -70,29 +70,33 @@ protected: } } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info) + TensorType compute_target(const TensorShape &input_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &output_shape, PadStrideInfo &pad_stride_info) { // Create tensors TensorType src = create_tensor<TensorType>(input_shape, DataType::F32); TensorType weights = create_tensor<TensorType>(weights_shape, DataType::F32); + TensorType biases = create_tensor<TensorType>(biases_shape, DataType::F32); TensorType dst = create_tensor<TensorType>(output_shape, DataType::F32); // Create Depthwise Convolution configure function FunctionType depthwise_convolution; - depthwise_convolution.configure(&src, &dst, &weights, pad_stride_info); + depthwise_convolution.configure(&src, &dst, &weights, &biases, pad_stride_info); // Allocate tensors src.allocator()->allocate(); weights.allocator()->allocate(); + biases.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(!weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!biases.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); // Fill tensors fill(AccessorType(src), 0); fill(AccessorType(weights), 1); + fill(AccessorType(biases), 2); // Compute function depthwise_convolution.run(); @@ -100,15 +104,17 @@ protected: return dst; } - SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info) + SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info) { SimpleTensor<T> src(in_shape, DataType::F32); SimpleTensor<T> weights(weights_shape, DataType::F32); + SimpleTensor<T> biases(biases_shape, DataType::F32); fill(src, 0); fill(weights, 1); + fill(biases, 2); - return reference::depthwise_convolution(src, weights, out_shape, pad_stride_info); + return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info); } TensorType _target{}; diff --git a/tests/validation/fixtures/DepthwiseSeparableConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseSeparableConvolutionLayerFixture.h index e8f6854b49..112bd19848 100644 --- a/tests/validation/fixtures/DepthwiseSeparableConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseSeparableConvolutionLayerFixture.h @@ -47,22 +47,25 @@ class DepthwiseSeparableConvolutionValidationFixture : public framework::Fixture { public: template <typename...> - void setup(TensorShape in_shape, TensorShape depthwise_weights_shape, TensorShape depthwise_out_shape, TensorShape pointwise_weights_shape, TensorShape biases_shape, TensorShape output_shape, + void setup(TensorShape in_shape, TensorShape depthwise_weights_shape, TensorShape depthwise_biases_shape, TensorShape depthwise_out_shape, TensorShape pointwise_weights_shape, + TensorShape pointwise_biases_shape, TensorShape output_shape, PadStrideInfo pad_stride_depthwise_info, PadStrideInfo pad_stride_pointwise_info) { - _target = compute_target(in_shape, depthwise_weights_shape, depthwise_out_shape, pointwise_weights_shape, biases_shape, output_shape, pad_stride_depthwise_info, pad_stride_pointwise_info); - _reference = compute_reference(in_shape, depthwise_weights_shape, depthwise_out_shape, pointwise_weights_shape, biases_shape, output_shape, pad_stride_depthwise_info, pad_stride_pointwise_info); + _target = compute_target(in_shape, depthwise_weights_shape, depthwise_biases_shape, depthwise_out_shape, pointwise_weights_shape, pointwise_biases_shape, output_shape, pad_stride_depthwise_info, + pad_stride_pointwise_info); + _reference = compute_reference(in_shape, depthwise_weights_shape, depthwise_biases_shape, depthwise_out_shape, pointwise_weights_shape, pointwise_biases_shape, output_shape, pad_stride_depthwise_info, + pad_stride_pointwise_info); } protected: template <typename U> - void fill(U &&tensor, int i) + void fill(U &&tensor, int i, bool zero_fill = false) { switch(tensor.data_type()) { case DataType::F32: { - std::uniform_real_distribution<> distribution(-1.0f, 1.0f); + std::uniform_real_distribution<> distribution((zero_fill) ? 0.f : -1.0f, (zero_fill) ? 0.f : 1.0f); library->fill(tensor, distribution, i); break; } @@ -71,42 +74,47 @@ protected: } } - TensorType compute_target(const TensorShape &input_shape, const TensorShape &depthwise_weights_shape, const TensorShape &depthwise_out_shape, const TensorShape &pointwise_weights_shape, - const TensorShape &biases_shape, - const TensorShape &output_shape, const PadStrideInfo &pad_stride_depthwise_info, const PadStrideInfo &pad_stride_pointwise_info) + TensorType compute_target(const TensorShape &input_shape, const TensorShape &depthwise_weights_shape, const TensorShape &depthwise_biases_shape, const TensorShape &depthwise_out_shape, + const TensorShape &pointwise_weights_shape, const TensorShape &pointwise_biases_shape, const TensorShape &output_shape, + const PadStrideInfo &pad_stride_depthwise_info, const PadStrideInfo &pad_stride_pointwise_info) { // Create tensors TensorType src = create_tensor<TensorType>(input_shape, DataType::F32); TensorType depthwise_weights = create_tensor<TensorType>(depthwise_weights_shape, DataType::F32); + TensorType depthwise_biases = create_tensor<TensorType>(depthwise_biases_shape, DataType::F32); TensorType depthwise_out = create_tensor<TensorType>(depthwise_out_shape, DataType::F32); TensorType pointwise_weights = create_tensor<TensorType>(pointwise_weights_shape, DataType::F32); - TensorType biases = create_tensor<TensorType>(biases_shape, DataType::F32); + TensorType pointwise_biases = create_tensor<TensorType>(pointwise_biases_shape, DataType::F32); TensorType dst = create_tensor<TensorType>(output_shape, DataType::F32); // Create Depthwise Separable Convolution Layer configure function CLDepthwiseSeparableConvolutionLayer depthwise_separable_convolution_layer; - depthwise_separable_convolution_layer.configure(&src, &depthwise_weights, &depthwise_out, &pointwise_weights, &biases, &dst, pad_stride_depthwise_info, pad_stride_pointwise_info); + depthwise_separable_convolution_layer.configure(&src, &depthwise_weights, &depthwise_biases, &depthwise_out, &pointwise_weights, &pointwise_biases, &dst, pad_stride_depthwise_info, + pad_stride_pointwise_info); // Allocate tensors src.allocator()->allocate(); depthwise_weights.allocator()->allocate(); + depthwise_biases.allocator()->allocate(); depthwise_out.allocator()->allocate(); pointwise_weights.allocator()->allocate(); - biases.allocator()->allocate(); + pointwise_biases.allocator()->allocate(); dst.allocator()->allocate(); ARM_COMPUTE_EXPECT(!src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(!depthwise_weights.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!depthwise_biases.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(!depthwise_out.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(!pointwise_weights.info()->is_resizable(), framework::LogLevel::ERRORS); - ARM_COMPUTE_EXPECT(!biases.info()->is_resizable(), framework::LogLevel::ERRORS); + ARM_COMPUTE_EXPECT(!pointwise_biases.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(!dst.info()->is_resizable(), framework::LogLevel::ERRORS); // Fill tensors fill(AccessorType(src), 0); fill(AccessorType(depthwise_weights), 1); - fill(AccessorType(pointwise_weights), 2); - fill(AccessorType(biases), 3); + fill(AccessorType(depthwise_biases), 2, true); + fill(AccessorType(pointwise_weights), 3); + fill(AccessorType(pointwise_biases), 4); // Compute function depthwise_separable_convolution_layer.run(); @@ -114,20 +122,27 @@ protected: return dst; } - SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &depthwise_weights_shape, const TensorShape &depthwise_out_shape, const TensorShape &pointwise_weights_shape, - const TensorShape &biases_shape, const TensorShape &dst_shape, const PadStrideInfo &pad_stride_depthwise_info, const PadStrideInfo &pad_stride_pointwise_info) + SimpleTensor<T> compute_reference(const TensorShape &in_shape, const TensorShape &depthwise_weights_shape, const TensorShape &depthwise_biases_shape, const TensorShape &depthwise_out_shape, + const TensorShape &pointwise_weights_shape, const TensorShape &pointwise_biases_shape, const TensorShape &dst_shape, + const PadStrideInfo &pad_stride_depthwise_info, const PadStrideInfo &pad_stride_pointwise_info) { SimpleTensor<T> src(in_shape, DataType::F32); SimpleTensor<T> depthwise_weights(depthwise_weights_shape, DataType::F32); + SimpleTensor<T> depthwise_biases(depthwise_biases_shape, DataType::F32); SimpleTensor<T> pointwise_weights(pointwise_weights_shape, DataType::F32); - SimpleTensor<T> biases(biases_shape, DataType::F32); + SimpleTensor<T> pointwise_biases(pointwise_biases_shape, DataType::F32); fill(src, 0); fill(depthwise_weights, 1); - fill(pointwise_weights, 2); - fill(biases, 3); + fill(depthwise_biases, 2, true); + fill(pointwise_weights, 3); + fill(pointwise_biases, 4); - return reference::depthwise_separable_convolution_layer(src, depthwise_weights, depthwise_out_shape, pointwise_weights, biases, dst_shape, pad_stride_depthwise_info, pad_stride_pointwise_info); + return reference::depthwise_separable_convolution_layer(src, + depthwise_weights, depthwise_biases, depthwise_out_shape, + pointwise_weights, pointwise_biases, + dst_shape, + pad_stride_depthwise_info, pad_stride_pointwise_info); } TensorType _target{}; |