From 96aa6b136cbe38ff5ba924c0109d35ba85546a8d Mon Sep 17 00:00:00 2001 From: Gian Marco Iodice Date: Thu, 27 Jun 2019 16:55:51 +0100 Subject: COMPMID-2431: Add a comment to max_offset in CLDepthwiseConvolutionLayer3x3NHWCKernel Change-Id: I4097cc3aec858beb0630254fe7a6790dfcb3d90f Signed-off-by: Gian Marco Iodice Reviewed-on: https://review.mlplatform.org/c/1431 Reviewed-by: Giuseppe Rossini Comments-Addressed: Arm Jenkins Tested-by: Arm Jenkins --- .../CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 35 ++++++++++++++++++++++ 1 file changed, 35 insertions(+) (limited to 'src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp') diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 779cf25fdf..c78ad1a5b5 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -343,6 +343,41 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com add_1D_tensor_argument(idx, _biases, win_biases); } + // Calculate the max_offset. + // max_offset is the offset for the last NOT valid value in the Z dimension (spatial dimension Y for NHWC) + // |******************| + // | pad_top | + // |******************| + // | | + // | plane0 | + // | batch0 | + // |__________________| + // |******************| Batch 0 + // | pad_bottom | + // | pad_top | + // |******************| + // | | + // | plane1 | + // | batch0 | + // |__________________|-----> max_offset + // |******************| + // | pad_bottom | + // | pad_top | + // |******************| + // | | + // | plane0 | + // | batch1 | + // |__________________| + // |******************| Batch 1 + // | pad_bottom | + // | pad_top | + // |******************| + // | | + // | plane1 | + // | batch1 | + // |__________________| + // | pad_bottom | + // |******************| const int max_offset = _input->info()->strides_in_bytes().z() * _input->info()->dimension(2) - (_input->info()->padding().bottom + _input->info()->padding().top) * _input->info()->strides_in_bytes().y(); _kernel.setArg(idx, max_offset); -- cgit v1.2.1