aboutsummaryrefslogtreecommitdiff
path: root/src/core/CL/cl_kernels/pixelwise_mul_float.cl
diff options
context:
space:
mode:
authorGeorgios Pinitas <georgios.pinitas@arm.com>2019-03-26 17:23:28 +0000
committerGian Marco Iodice <gianmarco.iodice@arm.com>2019-04-11 09:34:26 +0000
commit8be9148814b88e5b0cabd5a4d2b1f4ff470a8c1c (patch)
tree760658b8c7b8917379467bd3fc119a5502faa850 /src/core/CL/cl_kernels/pixelwise_mul_float.cl
parenta50e702289af66944e860eafc7f3b32f6c5f30be (diff)
downloadComputeLibrary-8be9148814b88e5b0cabd5a4d2b1f4ff470a8c1c.tar.gz
COMPMID-1959: Implements 2D FFT on OpenCL
Change-Id: I73cf3984a5463acc854c8a59dc2bd9a5234cd99c Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com> Reviewed-on: https://review.mlplatform.org/c/936 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/pixelwise_mul_float.cl')
-rw-r--r--src/core/CL/cl_kernels/pixelwise_mul_float.cl52
1 files changed, 50 insertions, 2 deletions
diff --git a/src/core/CL/cl_kernels/pixelwise_mul_float.cl b/src/core/CL/cl_kernels/pixelwise_mul_float.cl
index 9fa540e946..d0e04b2ffe 100644
--- a/src/core/CL/cl_kernels/pixelwise_mul_float.cl
+++ b/src/core/CL/cl_kernels/pixelwise_mul_float.cl
@@ -1,5 +1,5 @@
/*
- * Copyright (c) 2016-2018 ARM Limited.
+ * Copyright (c) 2016-2019 ARM Limited.
*
* SPDX-License-Identifier: MIT
*
@@ -94,4 +94,52 @@ __kernel void pixelwise_mul_float(
// Store result
vstore16(res, 0, (__global DATA_TYPE_OUT *)out.ptr);
}
-#endif /* defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(DATA_TYPE_RES) && defined(DATA_TYPE_OUT) */ \ No newline at end of file
+#endif /* defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(DATA_TYPE_RES) && defined(DATA_TYPE_OUT) */
+
+/** Performs a pixelwise multiplication of complex float values
+ *
+ * @param[in] in1_ptr Pointer to the source image. Supported data types: F32
+ * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes)
+ * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[in] in2_ptr Pointer to the source image. Supported data types: same as @p in1_ptr
+ * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes)
+ * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes)
+ * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes)
+ * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image
+ * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in1_ptr
+ * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes)
+ * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes)
+ * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes)
+ * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes)
+ * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes)
+ * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes)
+ * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image
+ */
+__kernel void pixelwise_mul_complex(
+ TENSOR3D_DECLARATION(in1),
+ TENSOR3D_DECLARATION(in2),
+ TENSOR3D_DECLARATION(out))
+{
+ // Get pixels pointer
+ Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1);
+ Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2);
+ Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out);
+
+ // Load data
+ float2 vin1 = vload2(0, (__global float *)in1.ptr);
+ float2 vin2 = vload2(0, (__global float *)in2.ptr);
+
+ // Perform complex multiplication
+ float2 res = { vin1.x *vin2.x - vin1.y * vin2.y, vin1.x *vin2.y + vin2.x * vin1.y };
+
+ // Store result
+ vstore2(res, 0, (__global float *)out.ptr);
+}