aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGian Marco Iodice <gianmarco.iodice@arm.com>2019-08-13 14:56:50 +0100
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-08-13 15:25:54 +0000
commit3ae323f941f55cf282caa75dffa5150dd975b321 (patch)
tree6c2110f7587e96b094157a0be3de491bc6103005
parent670c04ac4e99ee4b9eeb231e9d980c927bddfaa0 (diff)
downloadComputeLibrary-3ae323f941f55cf282caa75dffa5150dd975b321.tar.gz
COMPMID-2580: Fix out of bound read in Depthwise Convolution layer (OpenCL)
Change-Id: I00e39ed21cc30034aa10ac58b64d533e833eafc8 Signed-off-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Reviewed-on: https://review.mlplatform.org/c/1727 Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/core/CL/cl_kernels/depthwise_convolution.cl41
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp2
2 files changed, 25 insertions, 18 deletions
diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl
index c55a3d91c2..fb4a0fc157 100644
--- a/src/core/CL/cl_kernels/depthwise_convolution.cl
+++ b/src/core/CL/cl_kernels/depthwise_convolution.cl
@@ -280,16 +280,17 @@ inline float2 convolution1x3_stride_3(__global const uchar *left_pixel,
* @return a float2 containing 2 convoluted values.
*/
inline float2 convolution3x3(
- Image *src,
+ __global const uchar *src,
+ unsigned int src_stride_y,
const float mat0, const float mat1, const float mat2,
const float mat3, const float mat4, const float mat5,
const float mat6, const float mat7, const float mat8)
{
float2 pixels;
- pixels = convolution1x3(offset(src, 0, 0), mat0, mat1, mat2);
- pixels += convolution1x3(offset(src, 0, DILATION_Y), mat3, mat4, mat5);
- pixels += convolution1x3(offset(src, 0, DILATION_Y * 2), mat6, mat7, mat8);
+ pixels = convolution1x3((src + 0 * DILATION_Y * src_stride_y), mat0, mat1, mat2);
+ pixels += convolution1x3((src + 1 * DILATION_Y * src_stride_y), mat3, mat4, mat5);
+ pixels += convolution1x3((src + 2 * DILATION_Y * src_stride_y), mat6, mat7, mat8);
return pixels;
}
@@ -341,27 +342,33 @@ __kernel void depthwise_convolution_3x3(
Image src = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(src);
Image dst = CONVERT_TENSOR3D_TO_IMAGE_STRUCT(dst);
Tensor3D weights = CONVERT_TO_TENSOR3D_STRUCT_NO_STEP(weights);
-#if defined(HAS_BIAS)
- Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
-#endif //defined(HAS_BIAS)
+
+ float2 pixels = 0.0f;
// Extract channel and linearized batch indices
const int channel = get_global_id(2) % DST_CHANNELS;
const int batch = get_global_id(2) / DST_CHANNELS;
// Load relevant input and weights data (Accounts depth multiplier when indexing input, OFM = IFM * DEPTH_MULTIPLIER)
- src.ptr -= batch * (DST_CHANNELS / DEPTH_MULTIPLIER) * (DEPTH_MULTIPLIER - 1) * src_step_z + (channel - (channel / DEPTH_MULTIPLIER)) * src_step_z;
+
__global uchar *weights_addr = weights.ptr + get_global_id(0) * weights_step_x + get_global_id(1) * weights_step_y + channel * weights_step_z;
- uchar3 offset = (uchar3)(0, 1, 2) * (uchar3)weights_stride_y;
- float3 weights_values0 = vload3(0, (__global float *)(weights_addr + offset.s0));
- float3 weights_values1 = vload3(0, (__global float *)(weights_addr + offset.s1));
- float3 weights_values2 = vload3(0, (__global float *)(weights_addr + offset.s2));
+ __global uchar *src_addr = src.ptr - batch * (DST_CHANNELS / DEPTH_MULTIPLIER) * (DEPTH_MULTIPLIER - 1) * src_step_z - (channel - (channel / DEPTH_MULTIPLIER)) * src_step_z;
- float2 pixels = convolution3x3(&src, weights_values0.s0, weights_values0.s1, weights_values0.s2,
- weights_values1.s0, weights_values1.s1, weights_values1.s2,
- weights_values2.s0, weights_values2.s1, weights_values2.s2);
+ // Load the weights
+ float3 weights_values0 = vload3(0, (__global float *)(weights_addr + 0 * weights_stride_y));
+ float3 weights_values1 = vload3(0, (__global float *)(weights_addr + 1 * weights_stride_y));
+ float3 weights_values2 = vload3(0, (__global float *)(weights_addr + 2 * weights_stride_y));
+
+ pixels = convolution3x3(src_addr, src_stride_y,
+ weights_values0.s0, weights_values0.s1, weights_values0.s2,
+ weights_values1.s0, weights_values1.s1, weights_values1.s2,
+ weights_values2.s0, weights_values2.s1, weights_values2.s2);
#if defined(HAS_BIAS)
- pixels += (float2)(*((__global float *)(biases.ptr + channel * biases_stride_x)));
+ Vector biases = CONVERT_TO_VECTOR_STRUCT_NO_STEP(biases);
+
+ float bias = *((__global float *)(vector_offset(&biases, channel)));
+
+ pixels += (float2)bias;
#endif //defined(HAS_BIAS)
vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, pixels, A_VAL, B_VAL), 0, (__global float *)dst.ptr);
@@ -1862,4 +1869,4 @@ __kernel void depthwise_convolution_3x3_nhwc_stride1(
}
#endif // defined(NUM_ROWS_PROCESSED) && defined(NUM_PLANES_PROCESSED)
-#endif // defined(VEC_SIZE) && defined(SRC_DIM_2) && defined(CONV_PAD_TOP) && defined(CONV_PAD_LEFT) && defined(DATA_TYPE)
+#endif // defined(VEC_SIZE) && defined(SRC_DIM_2) && defined(CONV_PAD_TOP) && defined(CONV_PAD_LEFT) && defined(DATA_TYPE) \ No newline at end of file
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
index fe796e96b8..c91dcece80 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NCHWKernel.cpp
@@ -179,7 +179,7 @@ std::pair<Status, Window> validate_and_configure_window(ITensorInfo *input, ITen
num_elems_written_per_iteration_x = 8 / data_size_from_type(input->data_type());
num_elems_written_per_iteration_y = (is_qasymm && conv_stride_y == 1 && dilation.y() == 1) ? 2 : 1;
- num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x;
+ num_elems_read_per_iteration_x = 3 + (num_elems_written_per_iteration_x - 1) * conv_stride_x + (conv_stride_x > 1 ? 1 : 0);
num_elems_read_per_iteration_y = num_elems_written_per_iteration_y + 2;
}
num_elems_read_per_iteration_x += (num_elems_read_per_iteration_x - 1) * (dilation.x() - 1);