diff options
author | Georgios Pinitas <georgios.pinitas@arm.com> | 2019-03-26 17:23:28 +0000 |
---|---|---|
committer | Gian Marco Iodice <gianmarco.iodice@arm.com> | 2019-04-11 09:34:26 +0000 |
commit | 8be9148814b88e5b0cabd5a4d2b1f4ff470a8c1c (patch) | |
tree | 760658b8c7b8917379467bd3fc119a5502faa850 /src/core/CL/cl_kernels/pixelwise_mul_float.cl | |
parent | a50e702289af66944e860eafc7f3b32f6c5f30be (diff) | |
download | ComputeLibrary-8be9148814b88e5b0cabd5a4d2b1f4ff470a8c1c.tar.gz |
COMPMID-1959: Implements 2D FFT on OpenCL
Change-Id: I73cf3984a5463acc854c8a59dc2bd9a5234cd99c
Signed-off-by: Georgios Pinitas <georgios.pinitas@arm.com>
Reviewed-on: https://review.mlplatform.org/c/936
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Diffstat (limited to 'src/core/CL/cl_kernels/pixelwise_mul_float.cl')
-rw-r--r-- | src/core/CL/cl_kernels/pixelwise_mul_float.cl | 52 |
1 files changed, 50 insertions, 2 deletions
diff --git a/src/core/CL/cl_kernels/pixelwise_mul_float.cl b/src/core/CL/cl_kernels/pixelwise_mul_float.cl index 9fa540e946..d0e04b2ffe 100644 --- a/src/core/CL/cl_kernels/pixelwise_mul_float.cl +++ b/src/core/CL/cl_kernels/pixelwise_mul_float.cl @@ -1,5 +1,5 @@ /* - * Copyright (c) 2016-2018 ARM Limited. + * Copyright (c) 2016-2019 ARM Limited. * * SPDX-License-Identifier: MIT * @@ -94,4 +94,52 @@ __kernel void pixelwise_mul_float( // Store result vstore16(res, 0, (__global DATA_TYPE_OUT *)out.ptr); } -#endif /* defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(DATA_TYPE_RES) && defined(DATA_TYPE_OUT) */
\ No newline at end of file +#endif /* defined(DATA_TYPE_IN1) && defined(DATA_TYPE_IN2) && defined(DATA_TYPE_RES) && defined(DATA_TYPE_OUT) */ + +/** Performs a pixelwise multiplication of complex float values + * + * @param[in] in1_ptr Pointer to the source image. Supported data types: F32 + * @param[in] in1_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] in1_step_x in1_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] in1_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] in1_step_y in1_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] in1_stride_z Stride of the source image in Y dimension (in bytes) + * @param[in] in1_step_z in1_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] in1_offset_first_element_in_bytes The offset of the first element in the source image + * @param[in] in2_ptr Pointer to the source image. Supported data types: same as @p in1_ptr + * @param[in] in2_stride_x Stride of the source image in X dimension (in bytes) + * @param[in] in2_step_x in2_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] in2_stride_y Stride of the source image in Y dimension (in bytes) + * @param[in] in2_step_y in2_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] in2_stride_z Stride of the source image in Y dimension (in bytes) + * @param[in] in2_step_z in2_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] in2_offset_first_element_in_bytes The offset of the first element in the source image + * @param[out] out_ptr Pointer to the destination image. Supported data types: same as @p in1_ptr + * @param[in] out_stride_x Stride of the destination image in X dimension (in bytes) + * @param[in] out_step_x out_stride_x * number of elements along X processed per workitem(in bytes) + * @param[in] out_stride_y Stride of the destination image in Y dimension (in bytes) + * @param[in] out_step_y out_stride_y * number of elements along Y processed per workitem(in bytes) + * @param[in] out_stride_z Stride of the destination image in Y dimension (in bytes) + * @param[in] out_step_z out_stride_z * number of elements along Y processed per workitem(in bytes) + * @param[in] out_offset_first_element_in_bytes The offset of the first element in the destination image + */ +__kernel void pixelwise_mul_complex( + TENSOR3D_DECLARATION(in1), + TENSOR3D_DECLARATION(in2), + TENSOR3D_DECLARATION(out)) +{ + // Get pixels pointer + Tensor3D in1 = CONVERT_TO_TENSOR3D_STRUCT(in1); + Tensor3D in2 = CONVERT_TO_TENSOR3D_STRUCT(in2); + Tensor3D out = CONVERT_TO_TENSOR3D_STRUCT(out); + + // Load data + float2 vin1 = vload2(0, (__global float *)in1.ptr); + float2 vin2 = vload2(0, (__global float *)in2.ptr); + + // Perform complex multiplication + float2 res = { vin1.x *vin2.x - vin1.y * vin2.y, vin1.x *vin2.y + vin2.x * vin1.y }; + + // Store result + vstore2(res, 0, (__global float *)out.ptr); +} |