aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2018-05-18 17:11:16 +0100
committerAnthony Barbier <anthony.barbier@arm.com>2018-11-02 16:52:54 +0000
commitbc06f99c03ad73cf6033b5c9a848672e3e1c368d (patch)
treeca1c15e7b01c76323957400975c190aab6e47b1f
parent65f9982e99a6d9bea8084b7e15bc6c72ff535d8b (diff)
downloadComputeLibrary-bc06f99c03ad73cf6033b5c9a848672e3e1c368d.tar.gz
COMPMID-959: Fix deconvolution window.
Change-Id: I791855edf6f821381ecb8ff0652fb14a5810d9d7 Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/131912 Tested-by: Jenkins <bsgcomp@arm.com> Reviewed-by: Pablo Tello <pablo.tello@arm.com> Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
-rw-r--r--src/core/CL/cl_kernels/deconvolution_layer.cl12
-rw-r--r--src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp12
2 files changed, 15 insertions, 9 deletions
diff --git a/src/core/CL/cl_kernels/deconvolution_layer.cl b/src/core/CL/cl_kernels/deconvolution_layer.cl
index 794f4aa950..e15482c1ba 100644
--- a/src/core/CL/cl_kernels/deconvolution_layer.cl
+++ b/src/core/CL/cl_kernels/deconvolution_layer.cl
@@ -30,20 +30,24 @@
* @param[in] src_step_x src_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] src_stride_y Stride of the source image in Y dimension (in bytes)
* @param[in] src_step_y src_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] src_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] src_step_z src_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] src_offset_first_element_in_bytes The offset of the first element in the source image
* @param[out] dst_ptr Pointer to the destination image. Supported data types: F16/F32
* @param[in] dst_stride_x Stride of the destination image in X dimension (in bytes)
* @param[in] dst_step_x dst_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] dst_stride_y Stride of the destination image in Y dimension (in bytes)
* @param[in] dst_step_y dst_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] dst_stride_z Stride of the source tensor in Z dimension (in bytes)
+ * @param[in] dst_step_z dst_stride_z * number of elements along Z processed per workitem(in bytes)
* @param[in] dst_offset_first_element_in_bytes The offset of the first element in the destination image
*/
__kernel void deconvolution_upsample(
- IMAGE_DECLARATION(src),
- IMAGE_DECLARATION(dst))
+ TENSOR3D_DECLARATION(src),
+ TENSOR3D_DECLARATION(dst))
{
- Image src = CONVERT_TO_IMAGE_STRUCT(src);
- Image dst = CONVERT_TO_IMAGE_STRUCT(dst);
+ Tensor3D src = CONVERT_TO_TENSOR3D_STRUCT(src);
+ Tensor3D dst = CONVERT_TO_TENSOR3D_STRUCT(dst);
// Store result
*((__global DATA_TYPE *)dst.ptr) = *((__global DATA_TYPE *)src.ptr);
diff --git a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
index e7cdf8c607..1feac7d815 100644
--- a/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
+++ b/src/core/CL/kernels/CLDeconvolutionLayerUpsampleKernel.cpp
@@ -101,18 +101,20 @@ void CLDeconvolutionLayerUpsampleKernel::run(const Window &window, cl::CommandQu
const int out_end_y = _output->info()->dimension(1) - _info.pad().second + _info.stride().second - 1;
const int out_step_y = _info.stride().second;
- Window slice_out = window.first_slice_window_2D();
+ Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ);
+
+ Window slice_out = collapsed.first_slice_window_3D();
slice_out.set(Window::DimX, Window::Dimension(out_start_x, out_end_x, out_step_x));
slice_out.set(Window::DimY, Window::Dimension(out_start_y, out_end_y, out_step_y));
- Window slice_in = window.first_slice_window_2D();
+ Window slice_in = collapsed.first_slice_window_3D();
do
{
unsigned int idx = 0;
- add_2D_tensor_argument(idx, _input, slice_in);
- add_2D_tensor_argument(idx, _output, slice_out);
+ add_3D_tensor_argument(idx, _input, slice_in);
+ add_3D_tensor_argument(idx, _output, slice_out);
enqueue(queue, *this, slice_out);
}
- while(window.slide_window_slice_2D(slice_in) && window.slide_window_slice_2D(slice_out));
+ while(collapsed.slide_window_slice_3D(slice_in) && collapsed.slide_window_slice_3D(slice_out));
}