diff options
author | Giorgio Arena <giorgio.arena@arm.com> | 2020-12-10 16:49:39 +0000 |
---|---|---|
committer | Giorgio Arena <giorgio.arena@arm.com> | 2020-12-14 13:58:17 +0000 |
commit | ea7de7babc319e2fa31c5e1c986e48d6c5370689 (patch) | |
tree | 2303791668c67eda76dfb14d07b912af1cb54a17 /src/core/CL/cl_kernels/pixelwise_mul_float.cl | |
parent | ec241b48ea7481e797285788fd68e5e1d42382bb (diff) | |
download | ComputeLibrary-ea7de7babc319e2fa31c5e1c986e48d6c5370689.tar.gz |
Enable FFT for FP16
Resolves: COMPMID-4051
Change-Id: I0c0bf97212dd281c19d5081e6247e7dc0c23cd6b
Signed-off-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4687
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/pixelwise_mul_float.cl')
-rw-r--r-- | src/core/CL/cl_kernels/pixelwise_mul_float.cl | 19 |
1 files changed, 13 insertions, 6 deletions
diff --git a/src/core/CL/cl_kernels/pixelwise_mul_float.cl b/src/core/CL/cl_kernels/pixelwise_mul_float.cl index 4fa1551b54..845e1c9860 100644 --- a/src/core/CL/cl_kernels/pixelwise_mul_float.cl +++ b/src/core/CL/cl_kernels/pixelwise_mul_float.cl @@ -105,9 +105,11 @@ __kernel void pixelwise_mul_float( } #endif /* defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(ACC_DATA_TYPE) && defined(DATA_TYPE_OUT) */ +#if defined(DATA_TYPE) + /** Performs a pixelwise multiplication of complex float values * - * @param[in] in1_ptr Pointer to the source image. Supported data types: F32 + * @param[in] in1_ptr Pointer to the source image. Supported data types: F16/F32 * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes) * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes) @@ -143,16 +145,21 @@ __kernel void pixelwise_mul_complex( Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); // Load data - float2 vin1 = vload2(0, (__global float *)in1.ptr); - float2 vin2 = vload2(0, (__global float *)in2.ptr); + VEC_DATA_TYPE(DATA_TYPE, 2) + vin1 = vload2(0, (__global DATA_TYPE *)in1.ptr); + VEC_DATA_TYPE(DATA_TYPE, 2) + vin2 = vload2(0, (__global DATA_TYPE *)in2.ptr); // Perform complex multiplication - float2 res = { vin1.x *vin2.x - vin1.y * vin2.y, vin1.x *vin2.y + vin2.x * vin1.y }; + VEC_DATA_TYPE(DATA_TYPE, 2) + res = { vin1.x *vin2.x - vin1.y * vin2.y, vin1.x *vin2.y + vin2.x * vin1.y }; #if defined(ACTIVATION_TYPE) - vstore2(ACTIVATION(ACTIVATION_TYPE, float, VEC_SIZE, res, A_VAL, B_VAL), 0, (__global float *)out.ptr); + vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, res, A_VAL, B_VAL), 0, (__global DATA_TYPE *)out.ptr); #else // defined(ACTIVATION_TYPE) // Store result - vstore2(res, 0, (__global float *)out.ptr); + vstore2(res, 0, (__global DATA_TYPE *)out.ptr); #endif // defined(ACTIVATION_TYPE) } + +#endif // defined(DATA_TYPE)
\ No newline at end of file |