From 4a626a7d52e9c4759bdc16b65401a53779dd975f Mon Sep 17 00:00:00 2001 From: Pablo Tello Date: Wed, 4 Apr 2018 10:01:14 +0100 Subject: COMPMID-801: NHWC support in CLIm2Col. And extended tests coverage adding kernel shapes 3x1, 1x5 and 7x7 Change-Id: Ia7c1d4da2368d5f5fbc1a41187f4ac1aca5f150f Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/127727 Tested-by: Jenkins Reviewed-by: Gian Marco Iodice --- tests/validation/reference/Im2Col.cpp | 17 +++++++++++++---- 1 file changed, 13 insertions(+), 4 deletions(-) (limited to 'tests/validation/reference') diff --git a/tests/validation/reference/Im2Col.cpp b/tests/validation/reference/Im2Col.cpp index 5685b60026..83ef8b40a5 100644 --- a/tests/validation/reference/Im2Col.cpp +++ b/tests/validation/reference/Im2Col.cpp @@ -55,11 +55,16 @@ void im2col_nchw(const SimpleTensor &src, SimpleTensor &dst, const Size2D const int pad_val = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0; int dst_idx = 0; + // dst[dst_idx++] will write out of bounds if kernel_height == kernel_width == 1 because lasty will be the bottom padding row + // and this is not present in the dst buffer + const int lasty = src_height + (kernel_height > 1 ? pad_y : 0) - kernel_height; + const int lastx = src_width + (kernel_width > 1 ? pad_x : 0) - kernel_width; + for(int b = 0; b < batches; ++b) { - for(int y = -pad_y; y <= (src_height + pad_y - kernel_height); y += stride_y) + for(int y = -pad_y; y <= lasty; y += stride_y) { - for(int x = -pad_x; x <= (src_width + pad_x - kernel_width); x += stride_x) + for(int x = -pad_x; x <= lastx; x += stride_x) { for(int z = 0; z < src_depth; ++z) { @@ -97,11 +102,15 @@ void im2col_nhwc(const SimpleTensor &src, SimpleTensor &dst, const Size2D const int batches = src.shape().total_size_upper(3); const int pad_val = is_data_type_quantized_asymmetric(src.data_type()) ? src.quantization_info().offset : 0; int dst_idx = 0; + + const int lasty = src_height + (kernel_height > 1 ? pad_y : 0) - kernel_height; + const int lastx = src_width + (kernel_width > 1 ? pad_x : 0) - kernel_width; + for(int b = 0; b < batches; ++b) { - for(int y = -pad_y; y <= (src_height + pad_y - kernel_height); y += stride_y) + for(int y = -pad_y; y <= lasty; y += stride_y) { - for(int x = -pad_x; x <= (src_width + pad_x - kernel_width); x += stride_x) + for(int x = -pad_x; x <= lastx; x += stride_x) { for(int z = 0; z < src_depth; ++z) { -- cgit v1.2.1