diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-06-07 11:52:01 +0100 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-06-18 17:16:59 +0000 |
commit | 75bde5e21cfbf5e699a3a89655d97fec7c0892e7 (patch) | |
tree | 42080467e4c449134d1b93990876ad49de59bf2c /src | |
parent | 51403b5b8ae92782e2ed467cae4c0783faa5a22b (diff) | |
download | ComputeLibrary-75bde5e21cfbf5e699a3a89655d97fec7c0892e7.tar.gz |
COMPMID-2336: Account for padding in NEIm2ColKernel for NHWC.
Change-Id: I494c4acc95cb431b1718ae62c1504522a115ba10
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/1312
Reviewed-by: Michele Di Giorgio <michele.digiorgio@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Giuseppe Rossini <giuseppe.rossini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r-- | src/core/NEON/kernels/NEIm2ColKernel.cpp | 21 |
1 files changed, 11 insertions, 10 deletions
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp index 874259bbb7..0641d6cfa3 100644 --- a/src/core/NEON/kernels/NEIm2ColKernel.cpp +++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp @@ -205,15 +205,16 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr, int dilation_x, int dilation_y) { - const int end_x = start_x + kernel_width * dilation_x; - const int end_y = start_y + kernel_height * dilation_y; - const int pad_quant = kernel_width * input_c; - if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1)) + const int end_x = start_x + kernel_width * dilation_x; + const int end_y = start_y + kernel_height * dilation_y; + const int pad_quant = kernel_width * input_c; + const int element_size = static_cast<int>(sizeof(T)); + if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) && (input_stride_y == input_c * element_size)) { for(int y = start_y; y < end_y; y += dilation_y) { //optimized for no dilation and no boundary pixels - memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * sizeof(T)); + memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size); out_ptr += input_c * kernel_width; } } @@ -223,21 +224,21 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr, { if(y < 0 || y >= input_h) { - memset(out_ptr, pad_value, pad_quant * sizeof(T)); + memset(out_ptr, pad_value, pad_quant * element_size); out_ptr += pad_quant; } - else if(dilation_x > 1 || start_x < 0 || end_x >= input_w) + else if(dilation_x > 1 || start_x < 0 || end_x >= input_w || input_stride_y != input_c * element_size) { for(int x = start_x; x < end_x; x += dilation_x) { if(x < 0 || x >= input_w) { - memset(out_ptr, pad_value, input_c * sizeof(T)); + memset(out_ptr, pad_value, input_c * element_size); out_ptr += input_c; } else { - memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)), input_c * sizeof(T)); + memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)), input_c * element_size); out_ptr += input_c; } } @@ -245,7 +246,7 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr, else { //optimized for no dilation and no boundary pixels - memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * sizeof(T)); + memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size); out_ptr += input_c * kernel_width; } } |