diff options
Diffstat (limited to 'src/core/CL')
-rw-r--r-- | src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 35 |
1 files changed, 35 insertions, 0 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 779cf25fdf..c78ad1a5b5 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -343,6 +343,41 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com add_1D_tensor_argument(idx, _biases, win_biases); } + // Calculate the max_offset. + // max_offset is the offset for the last NOT valid value in the Z dimension (spatial dimension Y for NHWC) + // |******************| + // | pad_top | + // |******************| + // | | + // | plane0 | + // | batch0 | + // |__________________| + // |******************| Batch 0 + // | pad_bottom | + // | pad_top | + // |******************| + // | | + // | plane1 | + // | batch0 | + // |__________________|-----> max_offset + // |******************| + // | pad_bottom | + // | pad_top | + // |******************| + // | | + // | plane0 | + // | batch1 | + // |__________________| + // |******************| Batch 1 + // | pad_bottom | + // | pad_top | + // |******************| + // | | + // | plane1 | + // | batch1 | + // |__________________| + // | pad_bottom | + // |******************| const int max_offset = _input->info()->strides_in_bytes().z() * _input->info()->dimension(2) - (_input->info()->padding().bottom + _input->info()->padding().top) * _input->info()->strides_in_bytes().y(); _kernel.setArg(idx, max_offset); |