aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/activation_layer.cl
diff options
context:
space:
mode:
Diffstat (limited to 'src/core/CL/cl_kernels/activation_layer.cl')
-rw-r--r--src/core/CL/cl_kernels/activation_layer.cl17
1 files changed, 9 insertions, 8 deletions
diff --git a/src/core/CL/cl_kernels/activation_layer.cl b/src/core/CL/cl_kernels/activation_layer.cl
index f846cb2764..499378c87f 100644
--- a/src/core/CL/cl_kernels/activation_layer.cl
+++ b/src/core/CL/cl_kernels/activation_layer.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2019 Arm Limited.
+ * Copyright (c) 2016-2020 Arm Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -61,23 +61,24 @@ __kernel void activation_layer(
#endif /* not IN_PLACE */
)
{
+ uint x_offs = max((int)(get_global_id(0) * VEC_SIZE * sizeof(DATA_TYPE) - (VEC_SIZE - VEC_SIZE_LEFTOVER) % VEC_SIZE * sizeof(DATA_TYPE)), 0);
+
// Get pixels pointer
- Tensor3D input = CONVERT_TO_TENSOR3D_STRUCT(input);
+ __global uchar *input_addr = input_ptr + input_offset_first_element_in_bytes + x_offs + get_global_id(1) * input_stride_y + get_global_id(2) * input_stride_z;
#ifdef IN_PLACE
- Tensor3D output = input;
+ __global uchar *output_addr = input_addr;
#else /* IN_PLACE */
- Tensor3D output = CONVERT_TO_TENSOR3D_STRUCT(output);
+ __global uchar *output_addr = output_ptr + output_offset_first_element_in_bytes + x_offs + get_global_id(1) * output_stride_y + get_global_id(2) * output_stride_z;
#endif /* IN_PLACE */
// Load data
- TYPE data = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input.ptr);
+ TYPE data0 = VLOAD(VEC_SIZE)(0, (__global DATA_TYPE *)input_addr);
// Perform activation
- data = ACTIVATION(ACT, DATA_TYPE, data, A_VAL, B_VAL);
+ data0 = ACTIVATION(ACT, DATA_TYPE, data0, A_VAL, B_VAL);
// Store result
- VSTORE(VEC_SIZE)
- (data, 0, (__global DATA_TYPE *)output.ptr);
+ STORE_VECTOR_SELECT(data, DATA_TYPE, output_addr, VEC_SIZE, VEC_SIZE_LEFTOVER, VEC_SIZE_LEFTOVER != 0 && get_global_id(0) == 0)
}
#endif /* defined(ACT) */