aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp35
1 files changed, 35 insertions, 0 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
index 779cf25fdf..c78ad1a5b5 100644
--- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
+++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp
@@ -343,6 +343,41 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com
add_1D_tensor_argument(idx, _biases, win_biases);
}
+ // Calculate the max_offset.
+ // max_offset is the offset for the last NOT valid value in the Z dimension (spatial dimension Y for NHWC)
+ // |******************|
+ // | pad_top |
+ // |******************|
+ // | |
+ // | plane0 |
+ // | batch0 |
+ // |__________________|
+ // |******************| Batch 0
+ // | pad_bottom |
+ // | pad_top |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | batch0 |
+ // |__________________|-----> max_offset
+ // |******************|
+ // | pad_bottom |
+ // | pad_top |
+ // |******************|
+ // | |
+ // | plane0 |
+ // | batch1 |
+ // |__________________|
+ // |******************| Batch 1
+ // | pad_bottom |
+ // | pad_top |
+ // |******************|
+ // | |
+ // | plane1 |
+ // | batch1 |
+ // |__________________|
+ // | pad_bottom |
+ // |******************|
const int max_offset = _input->info()->strides_in_bytes().z() * _input->info()->dimension(2) - (_input->info()->padding().bottom + _input->info()->padding().top) *
_input->info()->strides_in_bytes().y();
_kernel.setArg(idx, max_offset);