diff options
author | Jakub Sujak <jakub.sujak@arm.com> | 2023-10-05 10:20:59 +0100 |
---|---|---|
committer | Jakub Sujak <jakub.sujak@arm.com> | 2023-10-05 20:38:57 +0000 |
commit | a23b4686a091a7960a4b336d0fe53f15db4ae538 (patch) | |
tree | 1ad99168638177ccbf4f7c991ac539b5dd270eca /src/gpu/cl/kernels/ClTransposeKernel.h | |
parent | 3831111db26d791cade87fd2d7fe2663e2ceb4a6 (diff) | |
download | ComputeLibrary-a23b4686a091a7960a4b336d0fe53f15db4ae538.tar.gz |
Optimize CLTranspose operator
* Transpose higher dimensional tensors (>2D) by collapsing higher
dimensions into the third dimension thus avoiding multiple dispatches
of the CL kernel
* Maximize tile size without register spilling
Resolves: COMPMID-6448
Change-Id: Iac094b8c428bdf319d9c28a8334cb55d58e2d14b
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10443
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/gpu/cl/kernels/ClTransposeKernel.h')
-rw-r--r-- | src/gpu/cl/kernels/ClTransposeKernel.h | 10 |
1 files changed, 5 insertions, 5 deletions
diff --git a/src/gpu/cl/kernels/ClTransposeKernel.h b/src/gpu/cl/kernels/ClTransposeKernel.h index b30d6f0281..eaad38b20f 100644 --- a/src/gpu/cl/kernels/ClTransposeKernel.h +++ b/src/gpu/cl/kernels/ClTransposeKernel.h @@ -1,5 +1,5 @@ /* - * Copyright (c) 2017-2021 Arm Limited. + * Copyright (c) 2017-2021, 2023 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -21,8 +21,8 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -#ifndef ARM_COMPUTE_CL_TRANSPOSE_KERNEL_H -#define ARM_COMPUTE_CL_TRANSPOSE_KERNEL_H +#ifndef ACL_SRC_GPU_CL_KERNELS_CLTRANSPOSEKERNEL_H +#define ACL_SRC_GPU_CL_KERNELS_CLTRANSPOSEKERNEL_H #include "src/core/common/Macros.h" #include "src/gpu/cl/ClCompileContext.h" @@ -34,7 +34,7 @@ namespace opencl { namespace kernels { -/** OpenCL kernel to transpose a 2D tensor. */ +/** OpenCL kernel to transpose a tensor. Only the first two dimensions (width, height) are transposed. */ class ClTransposeKernel : public IClKernel { public: @@ -61,4 +61,4 @@ public: } // namespace kernels } // namespace opencl } // namespace arm_compute -#endif /* ARM_COMPUTE_CL_TRANSPOSE_KERNEL_H */ +#endif // ACL_SRC_GPU_CL_KERNELS_CLTRANSPOSEKERNEL_H |