From a788c2f7b143731704cdbc6a7f0016e4f38896d9 Mon Sep 17 00:00:00 2001 From: Manuel Bottini Date: Mon, 8 Apr 2019 13:18:00 +0100 Subject: COMPMID-2108: Fuse Activation Layer in CLDepthwiseConvolutionLayer3x3Kernels for F32 Change-Id: I39dd23696b6d8573e172a59b9e327b6a69886f08 Signed-off-by: Manuel Bottini Reviewed-on: https://review.mlplatform.org/c/973 Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins Reviewed-by: Usama Arif Reviewed-by: Giuseppe Rossini --- src/core/CL/cl_kernels/depthwise_convolution.cl | 512 ++++++++++++--------- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 22 +- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 18 +- tests/SimpleTensor.h | 8 +- tests/validation/CL/DepthwiseConvolutionLayer.cpp | 276 ++++++----- .../GLES_COMPUTE/DepthwiseConvolutionLayer.cpp | 18 +- .../validation/NEON/DepthwiseConvolutionLayer.cpp | 246 +++++----- .../fixtures/DepthwiseConvolutionLayerFixture.h | 31 +- 8 files changed, 649 insertions(+), 482 deletions(-) diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl index 8ee0185fe6..a8611af98e 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution.cl @@ -24,7 +24,141 @@ #include "helpers.h" -#if defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) +#if defined(FUSED_ACTIVATION) +#define TYPE VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) +#define SELECT_TYPE VEC_DATA_TYPE(SELECT_DATA_TYPE, VEC_SIZE) +#include "activation_helpers.h" +#define ACTIVATION_FUNC(x) ACTIVATION_OP(FUSED_ACTIVATION, x) +#else /* defined(FUSED_ACTIVATION) */ +#define ACTIVATION_FUNC(x) (x) +#endif /* defined(FUSED_ACTIVATION) */ + +/** Get the pointer position at a certain offset in x and y direction. + * + * @param[in] ptr Pointer to the starting position of the buffer + * @param[in] x Relative X position + * @param[in] y Relative Y position + * @param[in] stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] stride_y Stride of the source tensor in Y dimension (in bytes) + * + * @return a uchar + */ +inline __global uchar *ptr_offset(__global uchar *ptr, const int x, const int y, const int stride_x, const int stride_y) +{ + return ptr + x * stride_x + y * stride_y; +} + +#if(DILATION_X == 1 && DILATION_Y == 1) + +#define CONVOLUTION1x3_BIFROST2X1_STRIDE1(acc, src0, weights_row0) \ + ({ \ + acc.s0 = fma(src0.s0, weights_row0.s0, acc.s0); \ + acc.s0 = fma(src0.s1, weights_row0.s1, acc.s0); \ + acc.s0 = fma(src0.s2, weights_row0.s2, acc.s0); \ + acc.s1 = fma(src0.s1, weights_row0.s0, acc.s1); \ + acc.s1 = fma(src0.s2, weights_row0.s1, acc.s1); \ + acc.s1 = fma(src0.s3, weights_row0.s2, acc.s1); \ + }) + +#define CONVOLUTION1x3_BIFROST4X1_STRIDE1(acc, src0, weights_row0) \ + ({ \ + acc.s0 = fma(src0.s0, weights_row0.s0, acc.s0); \ + acc.s0 = fma(src0.s1, weights_row0.s1, acc.s0); \ + acc.s0 = fma(src0.s2, weights_row0.s2, acc.s0); \ + acc.s1 = fma(src0.s1, weights_row0.s0, acc.s1); \ + acc.s1 = fma(src0.s2, weights_row0.s1, acc.s1); \ + acc.s1 = fma(src0.s3, weights_row0.s2, acc.s1); \ + acc.s2 = fma(src0.s2, weights_row0.s0, acc.s2); \ + acc.s2 = fma(src0.s3, weights_row0.s1, acc.s2); \ + acc.s2 = fma(src0.s4, weights_row0.s2, acc.s2); \ + acc.s3 = fma(src0.s3, weights_row0.s0, acc.s3); \ + acc.s3 = fma(src0.s4, weights_row0.s1, acc.s3); \ + acc.s3 = fma(src0.s5, weights_row0.s2, acc.s3); \ + }) + +#define CONVOLUTION1x3_BIFROST2X1_STRIDE2(acc, src0, src1, weights_row0) \ + ({ \ + acc.s0 = fma(src0.s0, weights_row0.s0, acc.s0); \ + acc.s0 = fma(src0.s1, weights_row0.s1, acc.s0); \ + acc.s0 = fma(src0.s2, weights_row0.s2, acc.s0); \ + acc.s1 = fma(src0.s2, weights_row0.s0, acc.s1); \ + acc.s1 = fma(src0.s3, weights_row0.s1, acc.s1); \ + acc.s1 = fma(src1.s0, weights_row0.s2, acc.s1); \ + }) + +#define CONVOLUTION1x3_BIFROST4X1_STRIDE2(acc, src0, src1, weights_row0) \ + ({ \ + acc.s0 = fma(src0.s0, weights_row0.s0, acc.s0); \ + acc.s0 = fma(src0.s1, weights_row0.s1, acc.s0); \ + acc.s0 = fma(src0.s2, weights_row0.s2, acc.s0); \ + acc.s1 = fma(src0.s2, weights_row0.s0, acc.s1); \ + acc.s1 = fma(src0.s3, weights_row0.s1, acc.s1); \ + acc.s1 = fma(src0.s4, weights_row0.s2, acc.s1); \ + acc.s2 = fma(src0.s4, weights_row0.s0, acc.s2); \ + acc.s2 = fma(src0.s5, weights_row0.s1, acc.s2); \ + acc.s2 = fma(src0.s6, weights_row0.s2, acc.s2); \ + acc.s3 = fma(src0.s6, weights_row0.s0, acc.s3); \ + acc.s3 = fma(src0.s7, weights_row0.s1, acc.s3); \ + acc.s3 = fma(src1.s0, weights_row0.s2, acc.s3); \ + }) + +#else /* DILATION_X==1 && DILATION_Y==1 */ + +#define CONVOLUTION1x3_BIFROST2X1_STRIDE1(acc, src0_left, src0_mid, src0_right, weights_row0) \ + ({ \ + acc.s0 = fma(src0_left.s0, weights_row0.s0, acc.s0); \ + acc.s0 = fma(src0_mid.s0, weights_row0.s1, acc.s0); \ + acc.s0 = fma(src0_right.s0, weights_row0.s2, acc.s0); \ + acc.s1 = fma(src0_left.s1, weights_row0.s0, acc.s1); \ + acc.s1 = fma(src0_mid.s1, weights_row0.s1, acc.s1); \ + acc.s1 = fma(src0_right.s1, weights_row0.s2, acc.s1); \ + }) + +#define CONVOLUTION1x3_BIFROST2X1_STRIDE2(acc, src0_left, src0_mid, src0_right, weights_row0) \ + ({ \ + acc.s0 = fma(src0_left.s0, weights_row0.s0, acc.s0); \ + acc.s0 = fma(src0_mid.s0, weights_row0.s1, acc.s0); \ + acc.s0 = fma(src0_right.s0, weights_row0.s2, acc.s0); \ + acc.s1 = fma(src0_left.s2, weights_row0.s0, acc.s1); \ + acc.s1 = fma(src0_mid.s2, weights_row0.s1, acc.s1); \ + acc.s1 = fma(src0_right.s2, weights_row0.s2, acc.s1); \ + }) + +#define CONVOLUTION1x3_BIFROST4X1_STRIDE1(acc, src0_left, src0_mid, src0_right, weights_row0) \ + ({ \ + acc.s0 = fma(src0_left.s0, weights_row0.s0, acc.s0); \ + acc.s0 = fma(src0_mid.s0, weights_row0.s1, acc.s0); \ + acc.s0 = fma(src0_right.s0, weights_row0.s2, acc.s0); \ + acc.s1 = fma(src0_left.s1, weights_row0.s0, acc.s1); \ + acc.s1 = fma(src0_mid.s1, weights_row0.s1, acc.s1); \ + acc.s1 = fma(src0_right.s1, weights_row0.s2, acc.s1); \ + acc.s2 = fma(src0_left.s2, weights_row0.s0, acc.s2); \ + acc.s2 = fma(src0_mid.s2, weights_row0.s1, acc.s2); \ + acc.s2 = fma(src0_right.s2, weights_row0.s2, acc.s2); \ + acc.s3 = fma(src0_left.s3, weights_row0.s0, acc.s3); \ + acc.s3 = fma(src0_mid.s3, weights_row0.s1, acc.s3); \ + acc.s3 = fma(src0_right.s3, weights_row0.s2, acc.s3); \ + }) + +#define CONVOLUTION1x3_BIFROST4X1_STRIDE2(acc, src0_left, src0_mid, src0_right, weights_row0) \ + ({ \ + acc.s0 = fma(src0_left.s0, weights_row0.s0, acc.s0); \ + acc.s0 = fma(src0_mid.s0, weights_row0.s1, acc.s0); \ + acc.s0 = fma(src0_right.s0, weights_row0.s2, acc.s0); \ + acc.s1 = fma(src0_left.s2, weights_row0.s0, acc.s1); \ + acc.s1 = fma(src0_mid.s2, weights_row0.s1, acc.s1); \ + acc.s1 = fma(src0_right.s2, weights_row0.s2, acc.s1); \ + acc.s2 = fma(src0_left.s4, weights_row0.s0, acc.s2); \ + acc.s2 = fma(src0_mid.s4, weights_row0.s1, acc.s2); \ + acc.s2 = fma(src0_right.s4, weights_row0.s2, acc.s2); \ + acc.s3 = fma(src0_left.s6, weights_row0.s0, acc.s3); \ + acc.s3 = fma(src0_mid.s6, weights_row0.s1, acc.s3); \ + acc.s3 = fma(src0_right.s6, weights_row0.s2, acc.s3); \ + }) + +#endif /* DILATION_X==1 && DILATION_Y==1 */ + +#if defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) && defined(IS_F32) #if defined(CONV_STRIDE_X) #if CONV_STRIDE_X == 1 @@ -234,132 +368,13 @@ __kernel void depthwise_convolution_3x3( pixels += (float2)(*((__global float *)(biases.ptr + channel * biases_stride_x))); #endif //defined(HAS_BIAS) - vstore2(pixels, 0, (__global float *)dst.ptr); + vstore2(ACTIVATION_FUNC(pixels), 0, (__global float *)dst.ptr); } #endif //defined(CONV_STRIDE_X) -#if(DILATION_X == 1 && DILATION_Y == 1) - -#define CONVOLUTION1x3_BIFROST2X1_STRIDE1(acc, src0, weights_row0) \ - ({ \ - acc.s0 = fma(src0.s0, weights_row0.s0, acc.s0); \ - acc.s0 = fma(src0.s1, weights_row0.s1, acc.s0); \ - acc.s0 = fma(src0.s2, weights_row0.s2, acc.s0); \ - acc.s1 = fma(src0.s1, weights_row0.s0, acc.s1); \ - acc.s1 = fma(src0.s2, weights_row0.s1, acc.s1); \ - acc.s1 = fma(src0.s3, weights_row0.s2, acc.s1); \ - }) - -#define CONVOLUTION1x3_BIFROST4X1_STRIDE1(acc, src0, weights_row0) \ - ({ \ - acc.s0 = fma(src0.s0, weights_row0.s0, acc.s0); \ - acc.s0 = fma(src0.s1, weights_row0.s1, acc.s0); \ - acc.s0 = fma(src0.s2, weights_row0.s2, acc.s0); \ - acc.s1 = fma(src0.s1, weights_row0.s0, acc.s1); \ - acc.s1 = fma(src0.s2, weights_row0.s1, acc.s1); \ - acc.s1 = fma(src0.s3, weights_row0.s2, acc.s1); \ - acc.s2 = fma(src0.s2, weights_row0.s0, acc.s2); \ - acc.s2 = fma(src0.s3, weights_row0.s1, acc.s2); \ - acc.s2 = fma(src0.s4, weights_row0.s2, acc.s2); \ - acc.s3 = fma(src0.s3, weights_row0.s0, acc.s3); \ - acc.s3 = fma(src0.s4, weights_row0.s1, acc.s3); \ - acc.s3 = fma(src0.s5, weights_row0.s2, acc.s3); \ - }) - -#define CONVOLUTION1x3_BIFROST2X1_STRIDE2(acc, src0, src1, weights_row0) \ - ({ \ - acc.s0 = fma(src0.s0, weights_row0.s0, acc.s0); \ - acc.s0 = fma(src0.s1, weights_row0.s1, acc.s0); \ - acc.s0 = fma(src0.s2, weights_row0.s2, acc.s0); \ - acc.s1 = fma(src0.s2, weights_row0.s0, acc.s1); \ - acc.s1 = fma(src0.s3, weights_row0.s1, acc.s1); \ - acc.s1 = fma(src1.s0, weights_row0.s2, acc.s1); \ - }) - -#define CONVOLUTION1x3_BIFROST4X1_STRIDE2(acc, src0, src1, weights_row0) \ - ({ \ - acc.s0 = fma(src0.s0, weights_row0.s0, acc.s0); \ - acc.s0 = fma(src0.s1, weights_row0.s1, acc.s0); \ - acc.s0 = fma(src0.s2, weights_row0.s2, acc.s0); \ - acc.s1 = fma(src0.s2, weights_row0.s0, acc.s1); \ - acc.s1 = fma(src0.s3, weights_row0.s1, acc.s1); \ - acc.s1 = fma(src0.s4, weights_row0.s2, acc.s1); \ - acc.s2 = fma(src0.s4, weights_row0.s0, acc.s2); \ - acc.s2 = fma(src0.s5, weights_row0.s1, acc.s2); \ - acc.s2 = fma(src0.s6, weights_row0.s2, acc.s2); \ - acc.s3 = fma(src0.s6, weights_row0.s0, acc.s3); \ - acc.s3 = fma(src0.s7, weights_row0.s1, acc.s3); \ - acc.s3 = fma(src1.s0, weights_row0.s2, acc.s3); \ - }) +#if(DILATION_X > 1 || DILATION_Y > 1) -#else /* DILATION_X==1 && DILATION_Y==1 */ - -#define CONVOLUTION1x3_BIFROST2X1_STRIDE1(acc, src0_left, src0_mid, src0_right, weights_row0) \ - ({ \ - acc.s0 = fma(src0_left.s0, weights_row0.s0, acc.s0); \ - acc.s0 = fma(src0_mid.s0, weights_row0.s1, acc.s0); \ - acc.s0 = fma(src0_right.s0, weights_row0.s2, acc.s0); \ - acc.s1 = fma(src0_left.s1, weights_row0.s0, acc.s1); \ - acc.s1 = fma(src0_mid.s1, weights_row0.s1, acc.s1); \ - acc.s1 = fma(src0_right.s1, weights_row0.s2, acc.s1); \ - }) - -#define CONVOLUTION1x3_BIFROST2X1_STRIDE2(acc, src0_left, src0_mid, src0_right, weights_row0) \ - ({ \ - acc.s0 = fma(src0_left.s0, weights_row0.s0, acc.s0); \ - acc.s0 = fma(src0_mid.s0, weights_row0.s1, acc.s0); \ - acc.s0 = fma(src0_right.s0, weights_row0.s2, acc.s0); \ - acc.s1 = fma(src0_left.s2, weights_row0.s0, acc.s1); \ - acc.s1 = fma(src0_mid.s2, weights_row0.s1, acc.s1); \ - acc.s1 = fma(src0_right.s2, weights_row0.s2, acc.s1); \ - }) - -#define CONVOLUTION1x3_BIFROST4X1_STRIDE1(acc, src0_left, src0_mid, src0_right, weights_row0) \ - ({ \ - acc.s0 = fma(src0_left.s0, weights_row0.s0, acc.s0); \ - acc.s0 = fma(src0_mid.s0, weights_row0.s1, acc.s0); \ - acc.s0 = fma(src0_right.s0, weights_row0.s2, acc.s0); \ - acc.s1 = fma(src0_left.s1, weights_row0.s0, acc.s1); \ - acc.s1 = fma(src0_mid.s1, weights_row0.s1, acc.s1); \ - acc.s1 = fma(src0_right.s1, weights_row0.s2, acc.s1); \ - acc.s2 = fma(src0_left.s2, weights_row0.s0, acc.s2); \ - acc.s2 = fma(src0_mid.s2, weights_row0.s1, acc.s2); \ - acc.s2 = fma(src0_right.s2, weights_row0.s2, acc.s2); \ - acc.s3 = fma(src0_left.s3, weights_row0.s0, acc.s3); \ - acc.s3 = fma(src0_mid.s3, weights_row0.s1, acc.s3); \ - acc.s3 = fma(src0_right.s3, weights_row0.s2, acc.s3); \ - }) - -#define CONVOLUTION1x3_BIFROST4X1_STRIDE2(acc, src0_left, src0_mid, src0_right, weights_row0) \ - ({ \ - acc.s0 = fma(src0_left.s0, weights_row0.s0, acc.s0); \ - acc.s0 = fma(src0_mid.s0, weights_row0.s1, acc.s0); \ - acc.s0 = fma(src0_right.s0, weights_row0.s2, acc.s0); \ - acc.s1 = fma(src0_left.s2, weights_row0.s0, acc.s1); \ - acc.s1 = fma(src0_mid.s2, weights_row0.s1, acc.s1); \ - acc.s1 = fma(src0_right.s2, weights_row0.s2, acc.s1); \ - acc.s2 = fma(src0_left.s4, weights_row0.s0, acc.s2); \ - acc.s2 = fma(src0_mid.s4, weights_row0.s1, acc.s2); \ - acc.s2 = fma(src0_right.s4, weights_row0.s2, acc.s2); \ - acc.s3 = fma(src0_left.s6, weights_row0.s0, acc.s3); \ - acc.s3 = fma(src0_mid.s6, weights_row0.s1, acc.s3); \ - acc.s3 = fma(src0_right.s6, weights_row0.s2, acc.s3); \ - }) - -/** Get the pointer position at a certain offset in x and y direction. - * - * @param[in] ptr Pointer to the starting position of the buffer - * @param[in] x Relative X position - * @param[in] y Relative Y position - * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) - */ -inline __global uchar *ptr_offset(__global uchar *ptr, const int x, const int y, const int stride_x, const int stride_y) -{ - return ptr + x * stride_x + y * stride_y; -} - -/** Perform 3x3 convolution for stride_x=1 and stride_y=1 when DILATION_X>1 and DILATION_Y>1 for F32 +/** Perform 3x3 convolution for stride_x=1 and stride_y=1 when DILATION_X>1 or DILATION_Y>1 for F32 * * @param[in] src_addr Pointer to the starting position of where to perform the convolution * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -397,7 +412,7 @@ inline float2 convolution_3x3_dilation_stridex1_stridey1_bifrost_f32(__global uc return pixels0; } -/** Perform 3x3 convolution for stride_x=2 and stride_y=2 when DILATION_X>1 and DILATION_Y>1 for F32 +/** Perform 3x3 convolution for stride_x=2 and stride_y=2 when DILATION_X>1 or DILATION_Y>1 for F32 * * @param[in] src_addr Pointer to the starting position of where to perform the convolution * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -435,87 +450,17 @@ inline float2 convolution_3x3_dilation_stridex2_stridey2_bifrost_f32(__global uc return pixels0; } -/** Perform 3x3 convolution for stride_x=1 and stride_y=1 when DILATION_X>1 and DILATION_Y>1 for f16 - * - * @param[in] src_addr Pointer to the starting position of where to perform the convolution - * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] y_offset Offset from the source tensor from which to start convolution - * @param[in] weights_addr Pointer from where to get weights - * @param[in] weights_stride_y Stride of weights tesnsor in Y dimension - */ -inline half4 convolution_3x3_dilation_stridex1_stridey1_bifrost_f16(__global uchar *src_addr, const int stride_x_bytes, const int stride_y_bytes, - const int y_offset, __global uchar *weights_addr, const int weights_stride_y) -{ - // Load the weights - half3 weights_row0 = vload3(0, (__global half *)(weights_addr + 0 * weights_stride_y)); - half3 weights_row1 = vload3(0, (__global half *)(weights_addr + 1 * weights_stride_y)); - half3 weights_row2 = vload3(0, (__global half *)(weights_addr + 2 * weights_stride_y)); - - half4 pixels0 = 0.0f; - - half4 src00_left = vload4(0, (__global half *)ptr_offset(src_addr, 0, y_offset, stride_x_bytes, stride_y_bytes)); // Row0 - half4 src00_mid = vload4(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset, stride_x_bytes, stride_y_bytes)); - half4 src00_right = vload4(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset, stride_x_bytes, stride_y_bytes)); - - half4 src10_left = vload4(0, (__global half *)ptr_offset(src_addr, 0, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); // Row1 - half4 src10_mid = vload4(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); - half4 src10_right = vload4(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); - - half4 src20_left = vload4(0, (__global half *)ptr_offset(src_addr, 0, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); // Row2 - half4 src20_mid = vload4(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); - half4 src20_right = vload4(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); - - CONVOLUTION1x3_BIFROST4X1_STRIDE1(pixels0, src00_left, src00_mid, src00_right, weights_row0); - CONVOLUTION1x3_BIFROST4X1_STRIDE1(pixels0, src10_left, src10_mid, src10_right, weights_row1); - CONVOLUTION1x3_BIFROST4X1_STRIDE1(pixels0, src20_left, src20_mid, src20_right, weights_row2); - - return pixels0; -} - -/** Perform 3x3 convolution for stride_x=2 and stride_y=2 when DILATION_X>1 and DILATION_Y>1 for F16 - * - * @param[in] src_addr Pointer to the starting position of where to perform the convolution - * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) - * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) - * @param[in] y_offset Offset from the source tensor from which to start convolution - * @param[in] weights_addr Pointer from where to get weights - * @param[in] weights_stride_y Stride of weights tesnsor in Y dimension - */ -inline half4 convolution_3x3_dilation_stridex2_stridey2_bifrost_f16(__global uchar *src_addr, const int stride_x_bytes, const int stride_y_bytes, - const int y_offset, __global uchar *weights_addr, const int weights_stride_y) -{ - // Load the weights - half3 weights_row0 = vload3(0, (__global half *)(weights_addr + 0 * weights_stride_y)); - half3 weights_row1 = vload3(0, (__global half *)(weights_addr + 1 * weights_stride_y)); - half3 weights_row2 = vload3(0, (__global half *)(weights_addr + 2 * weights_stride_y)); - - half4 pixels0 = 0.0f; - - half8 src00_left = vload8(0, (__global half *)ptr_offset(src_addr, 0, y_offset, stride_x_bytes, stride_y_bytes)); // Row0 - half8 src00_mid = vload8(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset, stride_x_bytes, stride_y_bytes)); - half8 src00_right = vload8(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset, stride_x_bytes, stride_y_bytes)); - - half8 src10_left = vload8(0, (__global half *)ptr_offset(src_addr, 0, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); // Row1 - half8 src10_mid = vload8(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); - half8 src10_right = vload8(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); - - half8 src20_left = vload8(0, (__global half *)ptr_offset(src_addr, 0, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); // Row2 - half8 src20_mid = vload8(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); - half8 src20_right = vload8(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); - - CONVOLUTION1x3_BIFROST4X1_STRIDE2(pixels0, src00_left, src00_mid, src00_right, weights_row0); - CONVOLUTION1x3_BIFROST4X1_STRIDE2(pixels0, src10_left, src10_mid, src10_right, weights_row1); - CONVOLUTION1x3_BIFROST4X1_STRIDE2(pixels0, src20_left, src20_mid, src20_right, weights_row2); - - return pixels0; -} - -#endif /* DILATION_X==1 && DILATION_Y==1 */ +#endif /* (DILATION_X > 1 || DILATION_Y > 1) */ /** This OpenCL kernel is optimized for Bifrost architectures and computes the depthwise convolution 3x3 when both * stride_x and stride_y are equal to 1 * + * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types: float. + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size + * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=float + * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) @@ -622,15 +567,21 @@ __kernel void depthwise_convolution_3x3_stridex1_stridey1_bifrost_f32( pixels3 += (float2)bias; #endif /* defined(HAS_BIAS) */ - vstore2(pixels0, 0, (__global float *)(dst.ptr + 0 * dst_stride_y)); - vstore2(pixels1, 0, (__global float *)(dst.ptr + 1 * dst_stride_y)); - vstore2(pixels2, 0, (__global float *)(dst.ptr + 2 * dst_stride_y)); - vstore2(pixels3, 0, (__global float *)(dst.ptr + 3 * dst_stride_y)); + vstore2(ACTIVATION_FUNC(pixels0), 0, (__global float *)(dst.ptr + 0 * dst_stride_y)); + vstore2(ACTIVATION_FUNC(pixels1), 0, (__global float *)(dst.ptr + 1 * dst_stride_y)); + vstore2(ACTIVATION_FUNC(pixels2), 0, (__global float *)(dst.ptr + 2 * dst_stride_y)); + vstore2(ACTIVATION_FUNC(pixels3), 0, (__global float *)(dst.ptr + 3 * dst_stride_y)); } /** This OpenCL kernel is optimized for Bifrost architectures and computes the depthwise convolution 3x3 when both * stride_x and stride_y are equal to 2 * + * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=float. Supported data types: float. + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size + * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=float + * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) @@ -727,11 +678,11 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f32( pixels1 += (float2)bias; #endif /* defined(HAS_BIAS) */ - vstore2(pixels0, 0, (__global float *)(dst.ptr + 0 * dst_stride_y)); - vstore2(pixels1, 0, (__global float *)(dst.ptr + 1 * dst_stride_y)); + vstore2(ACTIVATION_FUNC(pixels0), 0, (__global float *)(dst.ptr + 0 * dst_stride_y)); + vstore2(ACTIVATION_FUNC(pixels1), 0, (__global float *)(dst.ptr + 1 * dst_stride_y)); } -#endif // defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) +#endif // defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) && defined(IS_F32) #if defined(VEC_SIZE) && defined(DATA_TYPE) && defined(DST_WIDTH) /** Reshape the weights for quantized depthwise convolution @@ -998,7 +949,7 @@ __kernel void depthwise_vector_to_tensor( #endif //defined(CONV_WIDTH) && defined(CONV_HEIGHT) && defined(DATA_TYPE) -#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) +#if defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) && defined(IS_F16) #if defined(CONV_STRIDE_X) #if CONV_STRIDE_X == 1 #define convolution1x3_f16 convolution1x3_stride_1_f16 @@ -1010,6 +961,86 @@ __kernel void depthwise_vector_to_tensor( #error "Stride not supported" #endif /* CONV_STRIDE_X */ +#if(DILATION_X > 1 || DILATION_Y > 1) + +/** Perform 3x3 convolution for stride_x=1 and stride_y=1 when DILATION_X>1 or DILATION_Y>1 for f16 + * + * @param[in] src_addr Pointer to the starting position of where to perform the convolution + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] y_offset Offset from the source tensor from which to start convolution + * @param[in] weights_addr Pointer from where to get weights + * @param[in] weights_stride_y Stride of weights tesnsor in Y dimension + */ +inline half4 convolution_3x3_dilation_stridex1_stridey1_bifrost_f16(__global uchar *src_addr, const int stride_x_bytes, const int stride_y_bytes, + const int y_offset, __global uchar *weights_addr, const int weights_stride_y) +{ + // Load the weights + half3 weights_row0 = vload3(0, (__global half *)(weights_addr + 0 * weights_stride_y)); + half3 weights_row1 = vload3(0, (__global half *)(weights_addr + 1 * weights_stride_y)); + half3 weights_row2 = vload3(0, (__global half *)(weights_addr + 2 * weights_stride_y)); + + half4 pixels0 = 0.0f; + + half4 src00_left = vload4(0, (__global half *)ptr_offset(src_addr, 0, y_offset, stride_x_bytes, stride_y_bytes)); // Row0 + half4 src00_mid = vload4(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset, stride_x_bytes, stride_y_bytes)); + half4 src00_right = vload4(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset, stride_x_bytes, stride_y_bytes)); + + half4 src10_left = vload4(0, (__global half *)ptr_offset(src_addr, 0, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); // Row1 + half4 src10_mid = vload4(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); + half4 src10_right = vload4(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); + + half4 src20_left = vload4(0, (__global half *)ptr_offset(src_addr, 0, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); // Row2 + half4 src20_mid = vload4(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); + half4 src20_right = vload4(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); + + CONVOLUTION1x3_BIFROST4X1_STRIDE1(pixels0, src00_left, src00_mid, src00_right, weights_row0); + CONVOLUTION1x3_BIFROST4X1_STRIDE1(pixels0, src10_left, src10_mid, src10_right, weights_row1); + CONVOLUTION1x3_BIFROST4X1_STRIDE1(pixels0, src20_left, src20_mid, src20_right, weights_row2); + + return pixels0; +} + +/** Perform 3x3 convolution for stride_x=2 and stride_y=2 when DILATION_X>1 or DILATION_Y>1 for F16 + * + * @param[in] src_addr Pointer to the starting position of where to perform the convolution + * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) + * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) + * @param[in] y_offset Offset from the source tensor from which to start convolution + * @param[in] weights_addr Pointer from where to get weights + * @param[in] weights_stride_y Stride of weights tesnsor in Y dimension + */ +inline half4 convolution_3x3_dilation_stridex2_stridey2_bifrost_f16(__global uchar *src_addr, const int stride_x_bytes, const int stride_y_bytes, + const int y_offset, __global uchar *weights_addr, const int weights_stride_y) +{ + // Load the weights + half3 weights_row0 = vload3(0, (__global half *)(weights_addr + 0 * weights_stride_y)); + half3 weights_row1 = vload3(0, (__global half *)(weights_addr + 1 * weights_stride_y)); + half3 weights_row2 = vload3(0, (__global half *)(weights_addr + 2 * weights_stride_y)); + + half4 pixels0 = 0.0f; + + half8 src00_left = vload8(0, (__global half *)ptr_offset(src_addr, 0, y_offset, stride_x_bytes, stride_y_bytes)); // Row0 + half8 src00_mid = vload8(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset, stride_x_bytes, stride_y_bytes)); + half8 src00_right = vload8(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset, stride_x_bytes, stride_y_bytes)); + + half8 src10_left = vload8(0, (__global half *)ptr_offset(src_addr, 0, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); // Row1 + half8 src10_mid = vload8(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); + half8 src10_right = vload8(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset + DILATION_Y, stride_x_bytes, stride_y_bytes)); + + half8 src20_left = vload8(0, (__global half *)ptr_offset(src_addr, 0, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); // Row2 + half8 src20_mid = vload8(0, (__global half *)ptr_offset(src_addr, DILATION_X, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); + half8 src20_right = vload8(0, (__global half *)ptr_offset(src_addr, 2 * DILATION_X, y_offset + DILATION_Y * 2, stride_x_bytes, stride_y_bytes)); + + CONVOLUTION1x3_BIFROST4X1_STRIDE2(pixels0, src00_left, src00_mid, src00_right, weights_row0); + CONVOLUTION1x3_BIFROST4X1_STRIDE2(pixels0, src10_left, src10_mid, src10_right, weights_row1); + CONVOLUTION1x3_BIFROST4X1_STRIDE2(pixels0, src20_left, src20_mid, src20_right, weights_row2); + + return pixels0; +} + +#endif // (DILATION_X > 1 && DILATION_Y > 1) + /** Compute a 1D horizontal convolution of size 3 and stride 1 for 16bit floating point type. * * @param[in] left_pixel Pointer to the left pixel. @@ -1150,6 +1181,12 @@ inline half4 convolution3x3_f16( #if defined(DEPTH_MULTIPLIER) /** This OpenCL kernel computes the depthwise convolution 3x3 + * + * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=half. Supported data types: half. + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size + * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) @@ -1175,7 +1212,7 @@ inline half4 convolution3x3_f16( * @param[in] weights_stride_z Stride of the weights tensor in Z dimension (in bytes) * @param[in] weights_step_z weights_stride_z * number of elements along Y processed per workitem(in bytes) * @param[in] weights_offset_first_element_in_bytes The offset of the first element in the biases vector - * @param[in] biases_ptr (Optional) Pointer to the biases vector. Supported data types: F16/F32 + * @param[in] biases_ptr (Optional) Pointer to the biases vector. Supported data types: F16 * @param[in] biases_stride_x (Optional) Stride of the biases vector in X dimension (in bytes) * @param[in] biases_step_x (Optional) biases_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] biases_offset_first_element_in_bytes (Optional) The offset of the first element in the biases vector @@ -1216,7 +1253,7 @@ __kernel void depthwise_convolution_3x3_f16( pixels += (half4)(*((__global half *)(biases.ptr + channel * biases_stride_x))); #endif //defined(HAS_BIAS) - vstore4(pixels, 0, (__global half *)dst.ptr); + vstore4(ACTIVATION_FUNC(pixels), 0, (__global half *)dst.ptr); } #endif // defined(DEPTH_MULTIPLIER) #endif // defined(CONV_STRIDE_X) @@ -1224,6 +1261,12 @@ __kernel void depthwise_convolution_3x3_f16( /** This OpenCL kernel is optimized for Bifrost architectures and computes the 16bit floating point depthwise convolution 3x3 * when both stride_x and stride_y are equal to 1 * + * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=half. Supported data types: half. + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size + * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half + * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) @@ -1333,15 +1376,21 @@ __kernel void depthwise_convolution_3x3_stridex1_stridey1_bifrost_f16( pixels3 += (half4)bias; #endif /* defined(HAS_BIAS) */ - vstore4(pixels0, 0, (__global half *)(dst.ptr + 0 * dst_stride_y)); - vstore4(pixels1, 0, (__global half *)(dst.ptr + 1 * dst_stride_y)); - vstore4(pixels2, 0, (__global half *)(dst.ptr + 2 * dst_stride_y)); - vstore4(pixels3, 0, (__global half *)(dst.ptr + 3 * dst_stride_y)); + vstore4(ACTIVATION_FUNC(pixels0), 0, (__global half *)(dst.ptr + 0 * dst_stride_y)); + vstore4(ACTIVATION_FUNC(pixels1), 0, (__global half *)(dst.ptr + 1 * dst_stride_y)); + vstore4(ACTIVATION_FUNC(pixels2), 0, (__global half *)(dst.ptr + 2 * dst_stride_y)); + vstore4(ACTIVATION_FUNC(pixels3), 0, (__global half *)(dst.ptr + 3 * dst_stride_y)); } /** This OpenCL kernel is optimized for Bifrost architectures and computes 16bit floating point the depthwise convolution 3x3 * when both stride_x and stride_y are equal to 2 * + * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note If activation function is enabled, the data type must be passed at compile time using -DDATA_TYPE e.g. -DDATA_TYPE=half. Supported data types: half. + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size + * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half + * * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) @@ -1440,10 +1489,10 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16( pixels1 += (half4)bias; #endif /* defined(HAS_BIAS) */ - vstore4(pixels0, 0, (__global half *)(dst.ptr + 0 * dst_stride_y)); - vstore4(pixels1, 0, (__global half *)(dst.ptr + 1 * dst_stride_y)); + vstore4(ACTIVATION_FUNC(pixels0), 0, (__global half *)(dst.ptr + 0 * dst_stride_y)); + vstore4(ACTIVATION_FUNC(pixels1), 0, (__global half *)(dst.ptr + 1 * dst_stride_y)); } -#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) +#endif // defined(ARM_COMPUTE_OPENCL_FP16_ENABLED) && defined(DEPTH_MULTIPLIER) && defined(DST_CHANNELS) && defined(IS_F16) #if defined(VEC_SIZE) && defined(SRC_DIM_2) && defined(CONV_PAD_TOP) && defined(CONV_PAD_LEFT) && defined(DATA_TYPE) @@ -1463,8 +1512,12 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16( * @note The convolution pad top must be passed at compile time using -DCONV_PAD_LEFT (e.g. -DCONV_PAD_LEFT=1) * @note The convolution stride along the width must be passed at compile time using -DCONV_STRIDE_X (e.g. -DCONV_STRIDE_Y=X) * @note The convolution stride along the height must be passed at compile time using -DCONV_STRIDE_Y (e.g. -DCONV_STRIDE_Y=1) + * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size + * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: FP32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -1484,7 +1537,7 @@ __kernel void depthwise_convolution_3x3_stridex2_stridey2_bifrost_f16( * @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes) * @param[in] dst_step_w dst_stride_w * number of elements along W processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8 + * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: F16/F32 * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) @@ -1599,21 +1652,26 @@ __kernel void depthwise_convolution_3x3_nhwc( #endif /* defined(DST_DEPTH) */ VSTORE(VEC_SIZE) - (acc, 0, (__global DATA_TYPE *)(dst_addr)); + (ACTIVATION_FUNC(acc), 0, (__global DATA_TYPE *)(dst_addr)); } #endif // defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y) #if defined(NUM_ROWS_PROCESSED) && defined(NUM_PLANES_PROCESSED) /** This function computes the depthwise convolution for NHWC data layout when the stride along the width and height is 1. * + * @note Datatype should be given as a preprocessor argument using -DDATA_TYPE=type. e.g. -DDATA_TYPE=float * @note The number of elements read per thread must be passed at compile time using -DVEC_SIZE (e.g. -DVEC_SIZE=2) * @note Dimension two of the input tensor (height for NHWC data layout) must be passed at compile time using -DSRC_DIM2 (e.g. -DSRC_DIM_2=112) * @note The number of rows processed per thread must be passed at compile time using -DNUM_ROWS_PROCESSED (i.e. -DNUM_ROWS_PROCESSED=2) * @note The number of planes processed per thread must be passed at compile time using -DNUM_PLANES_PROCESSED (i.e. -DNUM_PLANES_PROCESSED=2) * @note The convolution pad top must be passed at compile time using -DCONV_PAD_TOP (e.g. -DCONV_PAD_TOP=1) * @note The convolution pad top must be passed at compile time using -DCONV_PAD_LEFT (e.g. -DCONV_PAD_LEFT=1) + * @note It is possible to select the activation function to apply using -DFUSED_ACTIVATION e.g. -DFUSED_ACTIVATION=relu + * @note A, B variables required by some activation functions are set using -DA_VAL= and -DB_VAL= respectively + * @note Vector size should be given as a preprocessor argument using -DVEC_SIZE=size + * @note Select data type should be given too with -DSELECT_DATA_TYPE e.g -DSELECT_DATA_TYPE=half * - * @param[in] src_ptr Pointer to the source tensor. Supported data types: FP32 + * @param[in] src_ptr Pointer to the source tensor. Supported data types: F16/F32 * @param[in] src_stride_x Stride of the source tensor in X dimension (in bytes) * @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] src_stride_y Stride of the source tensor in Y dimension (in bytes) @@ -1633,7 +1691,7 @@ __kernel void depthwise_convolution_3x3_nhwc( * @param[in] dst_stride_w Stride of the destination tensor in W dimension (in bytes) * @param[in] dst_step_w dst_stride_w * number of elements along W processed per workitem(in bytes) * @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination tensor - * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: QASYMM8 + * @param[in] weights_ptr Pointer to the weights tensor. Supported data types: F16/F32 * @param[in] weights_stride_x Stride of the weights tensor in X dimension (in bytes) * @param[in] weights_step_x weights_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] weights_stride_y Stride of the weights tensor in Y dimension (in bytes) @@ -1799,18 +1857,18 @@ __kernel void depthwise_convolution_3x3_nhwc_stride1( #endif /* defined(DST_DEPTH) */ VSTORE(VEC_SIZE) - (acc0, 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); + (ACTIVATION_FUNC(acc0), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y)); VSTORE(VEC_SIZE) - (acc1, 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); + (ACTIVATION_FUNC(acc1), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y)); #if((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0) if((z * NUM_PLANES_PROCESSED + 1) < DST_DIM_2) #endif // ((DST_DIM_2 % NUM_PLANES_PROCESSED) != 0) { VSTORE(VEC_SIZE) - (acc2, 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y + 1 * dst_stride_z)); + (ACTIVATION_FUNC(acc2), 0, (__global DATA_TYPE *)(dst_addr + 0 * dst_stride_y + 1 * dst_stride_z)); VSTORE(VEC_SIZE) - (acc3, 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y + 1 * dst_stride_z)); + (ACTIVATION_FUNC(acc3), 0, (__global DATA_TYPE *)(dst_addr + 1 * dst_stride_y + 1 * dst_stride_z)); } } diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index ec27e419c4..02d8c6d9c2 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -47,10 +47,10 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::QASYMM8, DataType::F16, DataType::F32); - ARM_COMPUTE_RETURN_ERROR_ON_MSG(act_info.enabled() && ((input->data_type() != DataType::QASYMM8) || ((act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC))), + ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && (input->data_type() == DataType::QASYMM8) && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC), "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON(weights->dimension(0) != 3 || weights->dimension(1) != 3); @@ -241,6 +241,7 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, // Set build options CLBuildOptions build_opts; + build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option("-DDST_CHANNELS=" + support::cpp11::to_string(_output->info()->tensor_shape().z())); build_opts.add_option("-DDEPTH_MULTIPLIER=" + support::cpp11::to_string(depth_multiplier)); build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(_conv_stride_x)); @@ -269,7 +270,6 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); const int o1 = output->info()->quantization_info().offset; - build_opts.add_option("-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); @@ -279,6 +279,18 @@ void CLDepthwiseConvolutionLayer3x3NCHWKernel::configure(const ICLTensor *input, build_opts.add_option("-DO1_VAL=" + support::cpp11::to_string(o1)); } } + else + { + build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); + build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); + build_opts.add_option_if(act_info.enabled(), "-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(input->info()->data_type())); + build_opts.add_option_if(act_info.enabled(), "-DDATA_TYPE=" + get_cl_type_from_data_type(input->info()->data_type())); + build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(win_config.second.x().step())); + } + + build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DIS_F16"); + build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32"); + _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); // Set config_id for enabling LWS tuning diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 86d186b95e..c31825cc2c 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -46,11 +46,11 @@ Status validate_arguments(const ITensorInfo *input, const ITensorInfo *weights, { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(input); ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input, 1, DataType::F16, DataType::F32, DataType::QASYMM8); - ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && ((input->data_type() != DataType::QASYMM8) || ((act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU) - && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC))), - "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported"); //COMPMID-1317 add fused activation for F32 + ARM_COMPUTE_RETURN_ERROR_ON_MSG((act_info.enabled()) && (input->data_type() == DataType::QASYMM8) && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::BOUNDED_RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::RELU) + && (act_info.activation() != ActivationLayerInfo::ActivationFunction::LOGISTIC), + "For QASYMM8 only logistic, relu, lower bounded relu and lower-upper bounded relu are supported"); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input, weights); ARM_COMPUTE_RETURN_ERROR_ON(depth_multiplier > 1); // COMPMID-1071 Add depth multiplier support for NHWC @@ -202,6 +202,7 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, const unsigned int num_elems_accessed_per_iteration = is_qasymm ? 4 : (8 / input->info()->element_size()); CLBuildOptions build_opts; + build_opts.add_option_if(act_info.enabled(), "-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option_if(_biases != nullptr, "-DHAS_BIAS"); build_opts.add_option("-DVEC_SIZE=" + support::cpp11::to_string(num_elems_accessed_per_iteration)); build_opts.add_option("-DSRC_DIM_2=" + support::cpp11::to_string(_input->info()->dimension(2))); @@ -231,7 +232,6 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, const int b_val = output->info()->quantization_info().quantize(act_info.b(), RoundingPolicy::TO_NEAREST_UP); const int o1 = output->info()->quantization_info().offset; - build_opts.add_option("-DFUSED_ACTIVATION=" + lower_string(string_from_activation_func(act_info.activation()))); build_opts.add_option("-DA_VAL=" + support::cpp11::to_string(a_val)); build_opts.add_option("-DB_VAL=" + support::cpp11::to_string(b_val)); build_opts.add_option("-DCONST_0=" + support::cpp11::to_string(o1)); @@ -243,6 +243,9 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, } else { + build_opts.add_option_if(act_info.enabled(), "-DA_VAL=" + float_to_string_with_full_precision(act_info.a())); + build_opts.add_option_if(act_info.enabled(), "-DB_VAL=" + float_to_string_with_full_precision(act_info.b())); + build_opts.add_option_if(act_info.enabled(), "-DSELECT_DATA_TYPE=" + get_cl_select_type_from_data_type(input->info()->data_type())); build_opts.add_option("-DDATA_TYPE=" + get_cl_type_from_data_type(_input->info()->data_type())); } @@ -275,6 +278,9 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, kernel_name += (is_stride_1_dilation_1 ? "_stride1" : ""); } + build_opts.add_option_if(input->info()->data_type() == DataType::F16, "-DIS_F16"); + build_opts.add_option_if(input->info()->data_type() == DataType::F32, "-DIS_F32"); + ICLKernel::configure_internal(win_config.second); _kernel = static_cast(CLKernelLibrary::get().create_kernel(kernel_name, build_opts.options())); diff --git a/tests/SimpleTensor.h b/tests/SimpleTensor.h index dd4a8bee2c..f0e9b15021 100644 --- a/tests/SimpleTensor.h +++ b/tests/SimpleTensor.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -280,7 +280,7 @@ SimpleTensor::SimpleTensor(TensorShape shape, DataType data_type, int num_cha _quantization_info(quantization_info), _data_layout(data_layout) { - _buffer = support::cpp14::make_unique(num_elements() * this->num_channels()); + _buffer = support::cpp14::make_unique(this->_shape.total_size() * _num_channels); } template @@ -293,8 +293,8 @@ SimpleTensor::SimpleTensor(const SimpleTensor &tensor) _quantization_info(tensor.quantization_info()), _data_layout(tensor.data_layout()) { - _buffer = support::cpp14::make_unique(tensor.num_elements() * num_channels()); - std::copy_n(tensor.data(), num_elements() * num_channels(), _buffer.get()); + _buffer = support::cpp14::make_unique(tensor.num_elements() * _num_channels); + std::copy_n(tensor.data(), this->_shape.total_size() * _num_channels, _buffer.get()); } template diff --git a/tests/validation/CL/DepthwiseConvolutionLayer.cpp b/tests/validation/CL/DepthwiseConvolutionLayer.cpp index 94f64e19b4..274a0f523a 100644 --- a/tests/validation/CL/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/CL/DepthwiseConvolutionLayer.cpp @@ -49,6 +49,13 @@ constexpr AbsoluteTolerance tolerance_qasymm8(0); /**< constexpr float tolerance_num = 0.05f; /**< Tolerance number */ const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 }); + +//Activation Functions +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}); } // namespace TEST_SUITE(CL) @@ -279,37 +286,41 @@ TEST_SUITE(FP16) TEST_SUITE(W3x3) TEST_SUITE(NCHW) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, - combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) + combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) + framework::dataset::make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } @@ -318,36 +329,40 @@ TEST_SUITE_END() // NCHW TEST_SUITE(NHWC) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, - combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NHWC))) + combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", DataLayout::NHWC)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NHWC))) + framework::dataset::make("DataLayout", DataLayout::NHWC)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16); } @@ -356,36 +371,41 @@ TEST_SUITE_END() // NHWC TEST_SUITE_END() // W3x3 TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f16, tolerance_num); } @@ -397,37 +417,42 @@ TEST_SUITE(FP32) TEST_SUITE(W3x3) TEST_SUITE(NCHW) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, - combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) + combine(combine(combine(combine(framework::dataset::concat(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + datasets::SmallDepthwiseConvolutionLayerDataset3x3NCHW()), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) + framework::dataset::make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, - combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) + combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } @@ -436,38 +461,43 @@ TEST_SUITE_END() // Dilation TEST_SUITE_END() // NCHW TEST_SUITE(NHWC) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, - combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NHWC))) + combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NHWC))) + framework::dataset::make("DataLayout", DataLayout::NHWC)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, - combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NHWC))) + combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", DataLayout::NHWC))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", DataLayout::NHWC)), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } @@ -476,36 +506,42 @@ TEST_SUITE_END() // NHWC TEST_SUITE_END() // W3x3 TEST_SUITE(Generic) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_f32); } @@ -521,39 +557,43 @@ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } @@ -561,39 +601,43 @@ TEST_SUITE_END() // Dilation TEST_SUITE_END() // Generic TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, CLDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(CLAccessor(_target), _reference, tolerance_qasymm8); } diff --git a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp index 22b1e08d5b..c31cae3561 100644 --- a/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/GLES_COMPUTE/DepthwiseConvolutionLayer.cpp @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2018 ARM Limited. + * Copyright (c) 2017-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -18,7 +18,7 @@ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONCLCTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ #include "arm_compute/core/Types.h" @@ -46,6 +46,10 @@ RelativeTolerance tolerance_fp16(half(0.2)); /**< Tolerance value for comp constexpr float tolerance_num = 0.07f; /**< Tolerance number */ const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 }); + +//Activation Functions +const auto ActivationFunctionsEmptyDataset = framework::dataset::make("ActivationInfo", +{ ActivationLayerInfo() }); } // namespace TEST_SUITE(GC) @@ -57,19 +61,21 @@ using GCDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidatio TEST_SUITE(Float) TEST_SUITE(FP16) TEST_SUITE(W3x3) -FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, GCDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) + framework::dataset::make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsEmptyDataset)) { validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, GCDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", DataLayout::NCHW))) + framework::dataset::make("DataLayout", DataLayout::NCHW)), + ActivationFunctionsEmptyDataset)) { validate(GCAccessor(_target), _reference, tolerance_fp16, tolerance_num); } diff --git a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp index b61393f9ea..8eefec37d5 100644 --- a/tests/validation/NEON/DepthwiseConvolutionLayer.cpp +++ b/tests/validation/NEON/DepthwiseConvolutionLayer.cpp @@ -54,10 +54,17 @@ constexpr float tolerance_num = 0.05f; /**< #endif // __ARM_FEATURE_FP16_VECTOR_ARITHMETIC const auto depth_multipliers = framework::dataset::make("DepthMultiplier", { 1, 2, 3 }); + +//Activation Functions +const auto ActivationFunctionsDataset = framework::dataset::make("ActivationInfo", +{ + ActivationLayerInfo(), + ActivationLayerInfo(ActivationLayerInfo::ActivationFunction::RELU) +}); } // namespace TEST_SUITE(NEON) -TEST_SUITE(DepthwiseConvLayer) +TEST_SUITE(DepthwiseConvolutionLayer) // *INDENT-OFF* // clang-format off @@ -246,37 +253,41 @@ TEST_SUITE(F32) TEST_SUITE(Generic) template using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } @@ -286,36 +297,41 @@ TEST_SUITE_END() // Generic TEST_SUITE(W3x3) template using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } @@ -323,20 +339,22 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, f TEST_SUITE_END() // Dilation FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::F32)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F32)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f32); } @@ -348,37 +366,41 @@ TEST_SUITE(F16) TEST_SUITE(Generic) template using NEDepthwiseConvolutionLayerFixture = DepthwiseConvolutionLayerValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16, tolerance_num); } @@ -388,38 +410,43 @@ TEST_SUITE_END() // Generic TEST_SUITE(W3x3) template using NEDepthwiseConvolutionLayerFixture3x3 = DepthwiseConvolutionLayerValidationFixture; -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } TEST_SUITE(Dilation) -FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), +FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::ALL, combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), framework::dataset::make("DataType", DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } -FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) +FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, + combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } @@ -427,20 +454,22 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerFixture3x3, fr TEST_SUITE_END() // Dilation FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerFixture3x3, framework::DatasetMode::NIGHTLY, - combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::F16)), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::F16)), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_f16); } @@ -459,31 +488,34 @@ TEST_SUITE(Quantized) TEST_SUITE(QASYMM8) TEST_SUITE(Generic) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } @@ -491,39 +523,43 @@ TEST_SUITE_END() //Dilation TEST_SUITE_END() // Generic TEST_SUITE(W3x3) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::SmallDepthwiseConvolutionLayerDataset3x3(), depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunOptimizedSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::SmallOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunOptimizedLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), - framework::dataset::make("DepthMultiplier", 1)), - framework::dataset::make("DataType", - DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::LargeOptimizedDepthwiseConvolutionLayerDataset3x3(), + framework::dataset::make("DepthMultiplier", 1)), + framework::dataset::make("DataType", + DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::LargeDepthwiseConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } @@ -531,19 +567,21 @@ FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3< TEST_SUITE(Dilation) FIXTURE_DATA_TEST_CASE(RunSmall, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::PRECOMMIT, - combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::SmallDepthwiseDilatedConvolutionLayerDataset3x3(), depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } FIXTURE_DATA_TEST_CASE(RunLarge, NEDepthwiseConvolutionLayerQuantizedFixture3x3, framework::DatasetMode::NIGHTLY, - combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), - depth_multipliers), - framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), - framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC }))) + combine(combine(combine(combine(combine(datasets::LargeDepthwiseDilatedConvolutionLayerDataset3x3(), + depth_multipliers), + framework::dataset::make("DataType", DataType::QASYMM8)), + framework::dataset::make("QuantizationInfo", { QuantizationInfo(0.5f, 10) })), + framework::dataset::make("DataLayout", { DataLayout::NCHW, DataLayout::NHWC })), + ActivationFunctionsDataset)) { validate(Accessor(_target), _reference, tolerance_qasymm8); } diff --git a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h index dd8bf232be..9e6dd4bd28 100644 --- a/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h +++ b/tests/validation/fixtures/DepthwiseConvolutionLayerFixture.h @@ -33,6 +33,7 @@ #include "tests/framework/Asserts.h" #include "tests/framework/Fixture.h" #include "tests/validation/Helpers.h" +#include "tests/validation/reference/ActivationLayer.h" #include "tests/validation/reference/DepthwiseConvolutionLayer.h" #include "utils/Utils.h" @@ -56,7 +57,7 @@ public: public: template void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info, - DataLayout data_layout) + DataLayout data_layout, ActivationLayerInfo act_info) { _quantization_info = quantization_info; _data_type = data_type; @@ -64,15 +65,15 @@ public: TensorShape weights_shape(kernel_size.width, kernel_size.height); - const TensorInfo in_info(in_shape, 1, data_type); - const TensorInfo we_info(weights_shape, 1, data_type); - const TensorShape out_shape = compute_depthwise_convolution_shape(in_info, we_info, pad_stride_info, depth_multiplier, dilation); + const TensorInfo in_info(in_shape, 1, data_type); + const TensorInfo we_info(weights_shape, 1, data_type); + const TensorShape out_shape = compute_depthwise_convolution_shape(in_info, we_info, pad_stride_info, depth_multiplier, dilation); weights_shape.set(2, out_shape.z()); const TensorShape biases_shape(weights_shape[2]); - _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, dilation, depth_multiplier, data_type, bias_data_type, quantization_info, data_layout); - _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, dilation, depth_multiplier, data_type, bias_data_type, quantization_info); + _target = compute_target(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, dilation, depth_multiplier, data_type, bias_data_type, quantization_info, data_layout, act_info); + _reference = compute_reference(in_shape, weights_shape, biases_shape, out_shape, pad_stride_info, dilation, depth_multiplier, data_type, bias_data_type, quantization_info, act_info); } protected: @@ -107,7 +108,7 @@ protected: TensorType compute_target(TensorShape input_shape, TensorShape weights_shape, TensorShape biases_shape, TensorShape output_shape, PadStrideInfo &pad_stride_info, Size2D dilation, unsigned int depth_multiplier, - const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout) + const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, const DataLayout data_layout, ActivationLayerInfo act_info) { if(data_layout == DataLayout::NHWC) { @@ -124,7 +125,7 @@ protected: // Create Depthwise Convolution configure function FunctionType dwc; - dwc.configure(&src, &weights, &biases, &dst, pad_stride_info, depth_multiplier, ActivationLayerInfo(), dilation); + dwc.configure(&src, &weights, &biases, &dst, pad_stride_info, depth_multiplier, act_info, dilation); ARM_COMPUTE_EXPECT(src.info()->is_resizable(), framework::LogLevel::ERRORS); ARM_COMPUTE_EXPECT(weights.info()->is_resizable(), framework::LogLevel::ERRORS); @@ -155,7 +156,7 @@ protected: SimpleTensor compute_reference(const TensorShape &in_shape, const TensorShape &weights_shape, const TensorShape &biases_shape, const TensorShape &out_shape, const PadStrideInfo &pad_stride_info, const Size2D &dilation, unsigned int depth_multiplier, - const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info) + const DataType data_type, const DataType bias_data_type, const QuantizationInfo quantization_info, ActivationLayerInfo act_info) { SimpleTensor src{ in_shape, data_type, 1, quantization_info }; SimpleTensor weights{ weights_shape, data_type, 1, quantization_info }; @@ -165,7 +166,8 @@ protected: fill(weights, 1); fill(biases, 2); - return reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info, depth_multiplier, dilation); + SimpleTensor depth_out = reference::depthwise_convolution(src, weights, biases, out_shape, pad_stride_info, depth_multiplier, dilation); + return (act_info.enabled()) ? reference::activation_layer(depth_out, act_info) : depth_out; } TensorType _target{}; @@ -179,10 +181,11 @@ class DepthwiseConvolutionLayerValidationFixture : public DepthwiseConvolutionLa { public: template - void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, DataLayout data_layout) + void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, DataLayout data_layout, + ActivationLayerInfo act_info) { DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, - data_type, QuantizationInfo(), data_layout); + data_type, QuantizationInfo(), data_layout, act_info); } }; @@ -192,10 +195,10 @@ class DepthwiseConvolutionLayerValidationQuantizedFixture : public DepthwiseConv public: template void setup(TensorShape in_shape, Size2D kernel_size, PadStrideInfo pad_stride_info, Size2D dilation, unsigned int depth_multiplier, DataType data_type, QuantizationInfo quantization_info, - DataLayout data_layout) + DataLayout data_layout, ActivationLayerInfo act_info) { DepthwiseConvolutionLayerValidationGenericFixture::setup(in_shape, kernel_size, pad_stride_info, dilation, depth_multiplier, - data_type, quantization_info, data_layout); + data_type, quantization_info, data_layout, act_info); } }; } // namespace validation -- cgit v1.2.1