diff options
Diffstat (limited to 'src/core/NEON/kernels/arm_conv/depthwise/premultiply.cpp')
-rw-r--r-- | src/core/NEON/kernels/arm_conv/depthwise/premultiply.cpp | 16 |
1 files changed, 15 insertions, 1 deletions
diff --git a/src/core/NEON/kernels/arm_conv/depthwise/premultiply.cpp b/src/core/NEON/kernels/arm_conv/depthwise/premultiply.cpp index ad4c821cfb..8a49c775d3 100644 --- a/src/core/NEON/kernels/arm_conv/depthwise/premultiply.cpp +++ b/src/core/NEON/kernels/arm_conv/depthwise/premultiply.cpp @@ -45,7 +45,9 @@ void do_premultiply_float_6(const float *in_ptr, { const float *ip = ip2; float *op = op2; - for(unsigned int c = 0; c < input_channels; c += BLOCK_SIZE) + + unsigned int num_blocks = input_channels / BLOCK_SIZE; + for(unsigned int c = 0; c < num_blocks; c++) { float vals[BLOCK_SIZE]; for(unsigned int v = 0; v < BLOCK_SIZE; v++) @@ -63,6 +65,18 @@ void do_premultiply_float_6(const float *in_ptr, op += CHANNEL_MULTIPLIER; } } + + unsigned int rem = input_channels - num_blocks * BLOCK_SIZE; + for(unsigned int c = 0; c < rem; c++) + { + float val = ip[c]; + for(unsigned int r = 0; r < CHANNEL_MULTIPLIER; r++) + { + op[r] = val; + } + op += CHANNEL_MULTIPLIER; + } + ip2 += ld_col; op2 += out_ld_col; } |