From 3ae323f941f55cf282caa75dffa5150dd975b321 Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Tue, 13 Aug 2019 14:56:50 +0100 Subject: COMPMID-2580: Fix out of bound read in Depthwise Convolution layer (OpenCL) Change-Id: I00e39ed21cc30034aa10ac58b64d533e833eafc8 Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/1727 Reviewed-by: Michele Di Giorgio Tested-by: Arm Jenkins --- src/core/CL/cl_kernels/depthwise_convolution.cl | 41 +++++++++++++--------- .../CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp | 2 +- 2 files changed, 25 insertions(+), 18 deletions(-) diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl index c55a3d91c2..fb4a0fc157 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution.cl @@ -280,16 +280,17 @@ inline float2 convolution1x3_stride_3(__global const uchar *left_pixel, * @return a float2 containing 2 convoluted values. */ inline float2 convolution3x3( - Image *src, + __global const uchar *src, + unsigned int src_stride_y, const float mat0, const float mat1, const float mat2, const float mat3, const float mat4, const float mat5, const float mat6, const float mat7, const float mat8) { float2 pixels; - pixels = convolution1x3(offset(src, 0, 0), mat0, mat1, mat2); - pixels += convolution1x3(offset(src, 0, DILATION_Y), mat3, mat4, mat5); - pixels += convolution1x3(offset(src, 0, DILATION_Y * 2), mat6, mat7, mat8); + pixels = convolution1x3((src + 0 * DILATION_Y * src_stride_y), mat0, mat1, mat2); + pixels += convolution1x3((src + 1 * DILATION_Y * src_stride_y), mat3, mat4, mat5); + pixels += convolution1x3((src + 2 * DILATION_Y * src_stride_y), mat6, mat7, mat8); return pixels; } @@ -341,27 +342,33 @@ __kernel void depthwise_convolution_3x3( Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src); Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst); Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights); -#if defined(HAS_BIAS) - Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); -#endif //defined(HAS_BIAS) + + float2 pixels = 0.0f; // Extract channel and linearized batch indices const int channel = get_global_id(2) % DST_CHANNELS; const int batch = get_global_id(2) / DST_CHANNELS; // Load relevant input and weights data (Accounts depth multiplier when indexing input, OFM = IFM * DEPTH_MULTIPLIER) - src.ptr -= batch * (DST_CHANNELS / DEPTH_MULTIPLIER) * (DEPTH_MULTIPLIER - 1) * src_step_z + (channel - (channel / DEPTH_MULTIPLIER)) * src_step_z; + __global uchar *weights_addr = weights.ptr + get_global_id(0) * weights_step_x + get_global_id(1) * weights_step_y + channel * weights_step_z; - uchar3 offset = (uchar3)(0, 1, 2) * (uchar3)weights_stride_y; - float3 weights_values0 = vload3(0, (__global float *)(weights_addr + offset.s0)); - float3 weights_values1 = vload3(0, (__global float *)(weights_addr + offset.s1)); - float3 weights_values2 = vload3(0, (__global float *)(weights_addr + offset.s2)); + __global uchar *src_addr = src.ptr - batch * (DST_CHANNELS / DEPTH_MULTIPLIER) * (DEPTH_MULTIPLIER - 1) * src_step_z - (channel - (channel / DEPTH_MULTIPLIER)) * src_step_z; - float2 pixels = convolution3x3(&src, weights_values0.s0, weights_values0.s1, weights_values0.s2, - weights_values1.s0, weights_values1.s1, weights_values1.s2, - weights_values2.s0, weights_values2.s1, weights_values2.s2); + // Load the weights + float3 weights_values0 = vload3(0, (__global float *)(weights_addr + 0 * weights_stride_y)); + float3 weights_values1 = vload3(0, (__global float *)(weights_addr + 1 * weights_stride_y)); + float3 weights_values2 = vload3(0, (__global float *)(weights_addr + 2 * weights_stride_y)); + + pixels = convolution3x3(src_addr, src_stride_y, + weights_values0.s0, weights_values0.s1, weights_values0.s2, + weights_values1.s0, weights_values1.s1, weights_values1.s2, + weights_values2.s0, weights_values2.s1, weights_values2.s2); #if defined(HAS_BIAS) - pixels += (float2)(*((__global float *)(biases.ptr + channel * biases_stride_x))); + Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases); + + float bias = *((__global float *)(vector_offset(&biases, channel))); + + pixels += (float2)bias; #endif //defined(HAS_BIAS) vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels, A_VAL, B_VAL), 0, (__global float *)dst.ptr); @@ -1862,4 +1869,4 @@ __kernel void depthwise_convolution_3x3_nhwc_stride1( } #endif // defined(NUM_ROWS_PROCESSED) && defined(NUM_PLANES_PROCESSED) -#endif // defined(VEC_SIZE) && defined(SRC_DIM_2) && defined(CONV_PAD_TOP) && defined(CONV_PAD_LEFT) && defined(DATA_TYPE) +#endif // defined(VEC_SIZE) && defined(SRC_DIM_2) && defined(CONV_PAD_TOP) && defined(CONV_PAD_LEFT) && defined(DATA_TYPE) \ No newline at end of file diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp index fe796e96b8..c91dcece80 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp @@ -179,7 +179,7 @@ std::pair validate_and_configure_window(ITensorInfo *input, ITen num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type()); num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y == 1 && dilation.y() == 1) ? 2 : 1; - num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x; + num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x + (conv_stride_x > 1 ? 1 : 0); num_elems_read_per_iteration_y = num_elems_written_per_iteration_y + 2; } num_elems_read_per_iteration_x += (num_elems_read_per_iteration_x - 1) * (dilation.x() - 1); -- cgit v1.2.1