diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2020-10-02 10:20:11 +0100 |
---|---|---|
committer | Giorgio Arena <giorgio.arena@arm.com> | 2020-10-07 14:28:17 +0000 |
commit | d304adbb1c6a2f66144c9cac1104f6e3f30d255a (patch) | |
tree | 325849f9280cfb0c92900794371d1c63d70a619c /src/core/CL/cl_kernels/activation_layer.cl | |
parent | 1e75adac392dd979bd1a838583ed196e311bc77a (diff) | |
download | ComputeLibrary-d304adbb1c6a2f66144c9cac1104f6e3f30d255a.tar.gz |
COMPMID-3703 Remove OpenCL padding: CLActivationLayerKernel + create utility macro
Change-Id: I73edadc7299247e7bc51ae37c00d3709023da44a
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4073
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/activation_layer.cl')
-rw-r--r-- | src/core/CL/cl_kernels/activation_layer.cl | 17 |
1 files changed, 9 insertions, 8 deletions
diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl index f846cb2764..499378c87f 100644 --- a/src/core/CL/cl_kernels/activation_layer.cl +++ b/src/core/CL/cl_kernels/activation_layer.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2019 Arm Limited. + * Copyright (c) 2016-2020 Arm Limited. * * SPDX-License-Identifier: MIT * @@ -61,23 +61,24 @@ __kernel void activation_layer( #endif /* not IN_PLACE */ ) { + uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0); + // Get pixels pointer - Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input); + __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z; #ifdef IN_PLACE - Tensor3D output = input; + __global uchar *output_addr = input_addr; #else /* IN_PLACE */ - Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output); + __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z; #endif /* IN_PLACE */ // Load data - TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr); + TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr); // Perform activation - data = ACTIVATION(ACT, DATA_TYPE, data, A_VAL, B_VAL); + data0 = ACTIVATION(ACT, DATA_TYPE, data0, A_VAL, B_VAL); // Store result - VSTORE(VEC_SIZE) - (data, 0, (__global DATA_TYPE *)output.ptr); + STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0) } #endif /* defined(ACT) */ |