diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2018-01-03 12:29:22 +0000 |
---|---|---|
committer | Anthony Barbier <anthony.barbier@arm.com> | 2018-11-02 16:42:33 +0000 |
commit | 1d08a310b7316f2b731e60ac36dc68989d15b546 (patch) | |
tree | 2fe747eb22a5a094bbcef8f7519eef9d4b6172db /src/core/CL/kernels | |
parent | 2c350181118ec9eca864432c5bd78a0cfc3ebc32 (diff) | |
download | ComputeLibrary-1d08a310b7316f2b731e60ac36dc68989d15b546.tar.gz |
COMPMID-765: Collapse execution window in CL kernels.
Updated following kernels to collapse their execution window and reduce
number of kernel enqueues:
-CLArithmeticAddition
-CLArithmeticSubtraction
-CLPixelWiseMultiplication
Change-Id: I13d503515a20fa9be1401ead1e27e9bbc6627975
Reviewed-on: https://eu-gerrit-1.euhpc.arm.com/114878
Reviewed-by: Anthony Barbier <anthony.barbier@arm.com>
Tested-by: Anthony Barbier <anthony.barbier@arm.com>
Diffstat (limited to 'src/core/CL/kernels')
-rw-r--r-- | src/core/CL/kernels/CLArithmeticAdditionKernel.cpp | 12 | ||||
-rw-r--r-- | src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp | 11 | ||||
-rw-r--r-- | src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp | 5 |
3 files changed, 16 insertions, 12 deletions
diff --git a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp index 2789573293..75701ee011 100644 --- a/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticAdditionKernel.cpp @@ -154,14 +154,16 @@ void CLArithmeticAdditionKernel::run(const Window &window, cl::CommandQueue &que ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - Window slice = window.first_slice_window_2D(); + Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + Window slice = collapsed.first_slice_window_3D(); + do { unsigned int idx = 0; - add_2D_tensor_argument(idx, _input1, slice); - add_2D_tensor_argument(idx, _input2, slice); - add_2D_tensor_argument(idx, _output, slice); + add_3D_tensor_argument(idx, _input1, slice); + add_3D_tensor_argument(idx, _input2, slice); + add_3D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice); } - while(window.slide_window_slice_2D(slice)); + while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp index cc2ef1f023..8308aa0767 100644 --- a/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp +++ b/src/core/CL/kernels/CLArithmeticSubtractionKernel.cpp @@ -146,15 +146,16 @@ void CLArithmeticSubtractionKernel::run(const Window &window, cl::CommandQueue & ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - Window slice = window.first_slice_window_2D(); + Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + Window slice = collapsed.first_slice_window_3D(); do { unsigned int idx = 0; - add_2D_tensor_argument(idx, _input1, slice); - add_2D_tensor_argument(idx, _input2, slice); - add_2D_tensor_argument(idx, _output, slice); + add_3D_tensor_argument(idx, _input1, slice); + add_3D_tensor_argument(idx, _input2, slice); + add_3D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice); } - while(window.slide_window_slice_2D(slice)); + while(collapsed.slide_window_slice_3D(slice)); } diff --git a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp index fd5e5d5862..6dba9c0f95 100644 --- a/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp +++ b/src/core/CL/kernels/CLPixelWiseMultiplicationKernel.cpp @@ -227,7 +227,8 @@ void CLPixelWiseMultiplicationKernel::run(const Window &window, cl::CommandQueue ARM_COMPUTE_ERROR_ON_UNCONFIGURED_KERNEL(this); ARM_COMPUTE_ERROR_ON_INVALID_SUBWINDOW(ICLKernel::window(), window); - Window slice = window.first_slice_window_3D(); + Window collapsed = window.collapse_if_possible(ICLKernel::window(), Window::DimZ); + Window slice = collapsed.first_slice_window_3D(); do { @@ -237,5 +238,5 @@ void CLPixelWiseMultiplicationKernel::run(const Window &window, cl::CommandQueue add_3D_tensor_argument(idx, _output, slice); enqueue(queue, *this, slice); } - while(window.slide_window_slice_3D(slice)); + while(collapsed.slide_window_slice_3D(slice)); } |