aboutsummaryrefslogtreecommitdiff
path: root/src/core/NEON/kernels/NEIm2ColKernel.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/NEON/kernels/NEIm2ColKernel.cpp')
-rw-r--r--src/core/NEON/kernels/NEIm2ColKernel.cpp21
1 files changed, 11 insertions, 10 deletions
diff --git a/src/core/NEON/kernels/NEIm2ColKernel.cpp b/src/core/NEON/kernels/NEIm2ColKernel.cpp
index 874259bbb7..0641d6cfa3 100644
--- a/src/core/NEON/kernels/NEIm2ColKernel.cpp
+++ b/src/core/NEON/kernels/NEIm2ColKernel.cpp
@@ -205,15 +205,16 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr,
int dilation_x,
int dilation_y)
{
- const int end_x = start_x + kernel_width * dilation_x;
- const int end_y = start_y + kernel_height * dilation_y;
- const int pad_quant = kernel_width * input_c;
- if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1))
+ const int end_x = start_x + kernel_width * dilation_x;
+ const int end_y = start_y + kernel_height * dilation_y;
+ const int pad_quant = kernel_width * input_c;
+ const int element_size = static_cast<int>(sizeof(T));
+ if((start_y >= 0) && (end_y < input_h) && (start_x >= 0) && (end_x < input_w) && (dilation_x == 1) && (input_stride_y == input_c * element_size))
{
for(int y = start_y; y < end_y; y += dilation_y)
{
//optimized for no dilation and no boundary pixels
- memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * sizeof(T));
+ memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
out_ptr += input_c * kernel_width;
}
}
@@ -223,21 +224,21 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr,
{
if(y < 0 || y >= input_h)
{
- memset(out_ptr, pad_value, pad_quant * sizeof(T));
+ memset(out_ptr, pad_value, pad_quant * element_size);
out_ptr += pad_quant;
}
- else if(dilation_x > 1 || start_x < 0 || end_x >= input_w)
+ else if(dilation_x > 1 || start_x < 0 || end_x >= input_w || input_stride_y != input_c * element_size)
{
for(int x = start_x; x < end_x; x += dilation_x)
{
if(x < 0 || x >= input_w)
{
- memset(out_ptr, pad_value, input_c * sizeof(T));
+ memset(out_ptr, pad_value, input_c * element_size);
out_ptr += input_c;
}
else
{
- memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)), input_c * sizeof(T));
+ memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + x * input_stride_y)), input_c * element_size);
out_ptr += input_c;
}
}
@@ -245,7 +246,7 @@ inline void linearize_volume_nhwc(const uint8_t *const in_ptr,
else
{
//optimized for no dilation and no boundary pixels
- memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * sizeof(T));
+ memcpy(out_ptr, reinterpret_cast<const T *>(in_ptr + (y * input_stride_z + start_x * input_stride_y)), input_c * kernel_width * element_size);
out_ptr += input_c * kernel_width;
}
}