diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-10-30 14:53:25 +0000 |
---|---|---|
committer | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-11-06 10:11:21 +0000 |
commit | 3704464b68ddd22739b38354de1721a3db4267b5 (patch) | |
tree | 0a8be84710a25a0b08df3c67dc6a3988ed6bc34b /src/core/CL/kernels | |
parent | 29421bd048ab67f893860e487c1b2b43305529f8 (diff) | |
download | ComputeLibrary-3704464b68ddd22739b38354de1721a3db4267b5.tar.gz |
COMPMID-1703: Collapse the 4th dimensions in CLDepthWiseConvolutionLayer3x3Kernel
Change-Id: Ie274da79b15c03f86dfedc85bb721b3de34a0bb4
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r-- | src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp | 22 |
1 files changed, 14 insertions, 8 deletions
diff --git a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp index 93d96dad1b..d76f5495f1 100644 --- a/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp +++ b/src/core/CL/kernels/CLDepthwiseConvolutionLayer3x3NHWCKernel.cpp @@ -245,6 +245,8 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::configure(const ICLTensor *input, build_opts.add_option("-DCONV_STRIDE_X=" + support::cpp11::to_string(conv_stride_x)); build_opts.add_option("-DCONV_STRIDE_Y=" + support::cpp11::to_string(_conv_stride_y)); } + build_opts.add_option_if(_input->info()->tensor_shape().total_size_upper(3) > 1, + "-DDST_DEPTH=" + support::cpp11::to_string(static_cast<int>(std::ceil(_output->info()->dimension(2) / static_cast<float>(_num_planes_processed_per_iteration))))); // Create kernel std::string kernel_name = std::string("depthwise_convolution_3x3") + (is_qasymm ? std::string("_quantized") + ((is_dot8_supported @@ -291,8 +293,12 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(IKernel::window(), window); - Window win = window; - win.set(Window::DimZ, Window::Dimension(0, std::ceil(_output->info()->dimension(2) / static_cast<float>(_num_planes_processed_per_iteration)), 1)); + // Collapse window + Window window_collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + const size_t total_batches = _input->info()->tensor_shape().total_size_upper(3); + + Window win = window_collapsed; + win.set(Window::DimZ, Window::Dimension(0, std::ceil(_output->info()->dimension(2) / static_cast<float>(_num_planes_processed_per_iteration)) * total_batches, 1)); // Create input window and adjust Window win_in = win; @@ -301,10 +307,10 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com ARM_COMPUTE_ERROR_ON((win_in.y().step() < window.y().step()) || (win_in.z().step() < window.z().step())); - Window slice_in = win_in.first_slice_window_3D(); - Window slice_out = win.first_slice_window_3D(); + Window slice_in = win_in.first_slice_window_4D(); + Window slice_out = win.first_slice_window_4D(); - unsigned int idx = 3 * num_arguments_per_3D_tensor(); + unsigned int idx = 2 * num_arguments_per_4D_tensor() + num_arguments_per_3D_tensor(); if(_biases != nullptr) { @@ -321,11 +327,11 @@ void CLDepthwiseConvolutionLayer3x3NHWCKernel::run(const Window &window, cl::Com do { unsigned int idx = 0; - add_3D_tensor_argument(idx, _input, slice_in); - add_3D_tensor_argument(idx, _output, slice_out); + add_4D_tensor_argument(idx, _input, slice_in); + add_4D_tensor_argument(idx, _output, slice_out); add_3D_tensor_argument(idx, _weights, slice_out); enqueue(queue, *this, slice_out, lws_hint()); } - while(window.slide_window_slice_3D(slice_out) && win_in.slide_window_slice_3D(slice_in)); + while(win.slide_window_slice_4D(slice_out) && win_in.slide_window_slice_4D(slice_in)); } |