diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2020-10-26 15:04:08 +0000 |
---|---|---|
committer | Giorgio Arena <giorgio.arena@arm.com> | 2020-11-12 12:42:51 +0000 |
commit | 2d1a835b68eb27a800838fc2b563b12eddf2c19f (patch) | |
tree | 228dee073d37d2ec5b5dfbdb3d0e1e512ecb2d22 /src/core/CL/cl_kernels/depthwise_convolution.cl | |
parent | 00c7601b1f9c3bec1d3b1db844abb513b9012541 (diff) | |
download | ComputeLibrary-2d1a835b68eb27a800838fc2b563b12eddf2c19f.tar.gz |
COMPMID-3735 Remove OpenCL padding: CLSoftmaxLayerKernel
- Renamed SELECT_DATA_TYPE to SELECT_VEC_DATA_TYPE to reflect its usage with vectors. SELECT_DATA_TYPE(dt) will now return the primitive data type
- Changed the interface of VEC_OFFS and V_OFFS in order to receive the primitive data type as a parameter rather than its vector form
- Performed a general cleanup of the kernels, such as creating macro for sum and max reduces, remove reduntant macros, defines, variables, calculations, etc...
- Using VEC_SIZE and VEC_SIZE_LEFTOVER in every kernel in order to allow computation for smaller shapes without adding paddings
- Removed the actual padding from the kernel and adjusting its calculations accordingly. Added asserts for padding removal checks. Removed invalid Validate tests.
Change-Id: If5ccbd5d34e255d38c7f6bfe8740e2b80b28e264
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4277
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: SiCong Li <sicong.li@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/depthwise_convolution.cl')
-rw-r--r-- | src/core/CL/cl_kernels/depthwise_convolution.cl | 24 |
1 files changed, 12 insertions, 12 deletions
diff --git a/src/core/CL/cl_kernels/depthwise_convolution.cl b/src/core/CL/cl_kernels/depthwise_convolution.cl index 5aba2061b4..81fa01ae99 100644 --- a/src/core/CL/cl_kernels/depthwise_convolution.cl +++ b/src/core/CL/cl_kernels/depthwise_convolution.cl @@ -1476,17 +1476,17 @@ __kernel void dwc_MxN_native_fp_nhwc( #define VEC_FLOAT VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE) -#define FILL_ZERO_OUT_OF_BOUND_3(data_type, vec_size, basename, cond) \ - ({ \ - basename##0 = select(basename##0, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_DATA_TYPE(data_type, vec_size))((cond).s0)); \ - basename##1 = select(basename##1, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_DATA_TYPE(data_type, vec_size))((cond).s1)); \ - basename##2 = select(basename##2, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_DATA_TYPE(data_type, vec_size))((cond).s2)); \ +#define FILL_ZERO_OUT_OF_BOUND_3(data_type, vec_size, basename, cond) \ + ({ \ + basename##0 = select(basename##0, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_VEC_DATA_TYPE(data_type, vec_size))((cond).s0)); \ + basename##1 = select(basename##1, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_VEC_DATA_TYPE(data_type, vec_size))((cond).s1)); \ + basename##2 = select(basename##2, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_VEC_DATA_TYPE(data_type, vec_size))((cond).s2)); \ }) -#define FILL_ZERO_OUT_OF_BOUND_4(data_type, vec_size, basename, cond) \ - ({ \ - FILL_ZERO_OUT_OF_BOUND_3(data_type, vec_size, basename, cond); \ - basename##3 = select(basename##3, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_DATA_TYPE(data_type, vec_size))((cond).s3)); \ +#define FILL_ZERO_OUT_OF_BOUND_4(data_type, vec_size, basename, cond) \ + ({ \ + FILL_ZERO_OUT_OF_BOUND_3(data_type, vec_size, basename, cond); \ + basename##3 = select(basename##3, (VEC_DATA_TYPE(data_type, vec_size))0, (SELECT_VEC_DATA_TYPE(data_type, vec_size))((cond).s3)); \ }) #if defined(CONV_STRIDE_X) && defined(CONV_STRIDE_Y) @@ -1728,8 +1728,8 @@ __kernel void depthwise_convolution_3x3_nhwc_stride1( __global uchar *src_addr = src_ptr + src_offset_first_element_in_bytes + x_offset; #endif /* defined(DST_DEPTH) */ - int4 src_coord_y = (int4)(y * NUM_ROWS_PROCESSED - CONV_PAD_LEFT) + V_OFFS4(int4); - int4 src_coord_z = (int4)(z * NUM_PLANES_PROCESSED - CONV_PAD_TOP) + V_OFFS4(int4); + int4 src_coord_y = (int4)(y * NUM_ROWS_PROCESSED - CONV_PAD_LEFT) + V_OFFS4(int); + int4 src_coord_z = (int4)(z * NUM_PLANES_PROCESSED - CONV_PAD_TOP) + V_OFFS4(int); int4 src_offset_y = clamp(src_coord_y, (int4)0, (int4)(SRC_DIM_1 - 1)); int4 src_offset_z = clamp(src_coord_z, (int4)0, (int4)(SRC_DIM_2 - 1)); @@ -1844,7 +1844,7 @@ __kernel void depthwise_convolution_3x3_nhwc_stride1( acc3 += bias_values; #endif // defined(HAS_BIAS) - int2 dst_offset_y = min((int2)(y * NUM_ROWS_PROCESSED) + V_OFFS2(int2), (int2)(DST_DIM_1 - 1)) * (int2)dst_stride_y; + int2 dst_offset_y = min((int2)(y * NUM_ROWS_PROCESSED) + V_OFFS2(int), (int2)(DST_DIM_1 - 1)) * (int2)dst_stride_y; int dst_coord_z = z * NUM_PLANES_PROCESSED; #if defined(DST_DEPTH) |