aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/pixelwise_mul_float.cl
diff options
context:
space:
mode:
authorGiorgio Arena <giorgio.arena@arm.com>2020-12-10 16:49:39 +0000
committerGiorgio Arena <giorgio.arena@arm.com>2020-12-14 13:58:17 +0000
commitea7de7babc319e2fa31c5e1c986e48d6c5370689 (patch)
tree2303791668c67eda76dfb14d07b912af1cb54a17 /src/core/CL/cl_kernels/pixelwise_mul_float.cl
parentec241b48ea7481e797285788fd68e5e1d42382bb (diff)
downloadComputeLibrary-ea7de7babc319e2fa31c5e1c986e48d6c5370689.tar.gz
Enable FFT for FP16
Resolves: COMPMID-4051 Change-Id: I0c0bf97212dd281c19d5081e6247e7dc0c23cd6b Signed-off-by: Giorgio Arena <giorgio.arena@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4687 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/pixelwise_mul_float.cl')
-rw-r--r--src/core/CL/cl_kernels/pixelwise_mul_float.cl19
1 files changed, 13 insertions, 6 deletions
diff --git a/src/core/CL/cl_kernels/pixelwise_mul_float.cl b/src/core/CL/cl_kernels/pixelwise_mul_float.cl
index 4fa1551b54..845e1c9860 100644
--- a/src/core/CL/cl_kernels/pixelwise_mul_float.cl
+++ b/src/core/CL/cl_kernels/pixelwise_mul_float.cl
@@ -105,9 +105,11 @@ __kernel void pixelwise_mul_float(
}
#endif /* defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(ACC_DATA_TYPE) && defined(DATA_TYPE_OUT) */
+#if defined(DATA_TYPE)
+
/** Performs a pixelwise multiplication of complex float values
*
- * @param[in] in1_ptr Pointer to the source image. Supported data types: F32
+ * @param[in] in1_ptr Pointer to the source image. Supported data types: F16/F32
* @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
* @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
* @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
@@ -143,16 +145,21 @@ __kernel void pixelwise_mul_complex(
Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
// Load data
- float2 vin1 = vload2(0, (__global float *)in1.ptr);
- float2 vin2 = vload2(0, (__global float *)in2.ptr);
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ vin1 = vload2(0, (__global DATA_TYPE *)in1.ptr);
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ vin2 = vload2(0, (__global DATA_TYPE *)in2.ptr);
// Perform complex multiplication
- float2 res = { vin1.x *vin2.x - vin1.y * vin2.y, vin1.x *vin2.y + vin2.x * vin1.y };
+ VEC_DATA_TYPE(DATA_TYPE, 2)
+ res = { vin1.x *vin2.x - vin1.y * vin2.y, vin1.x *vin2.y + vin2.x * vin1.y };
#if defined(ACTIVATION_TYPE)
- vstore2(ACTIVATION(ACTIVATION_TYPE, float, VEC_SIZE, res, A_VAL, B_VAL), 0, (__global float *)out.ptr);
+ vstore2(ACTIVATION(ACTIVATION_TYPE, DATA_TYPE, VEC_SIZE, res, A_VAL, B_VAL), 0, (__global DATA_TYPE *)out.ptr);
#else // defined(ACTIVATION_TYPE)
// Store result
- vstore2(res, 0, (__global float *)out.ptr);
+ vstore2(res, 0, (__global DATA_TYPE *)out.ptr);
#endif // defined(ACTIVATION_TYPE)
}
+
+#endif // defined(DATA_TYPE) \ No newline at end of file