diff options
author | Suhail Munshi <MohammedSuhail.Munshi@arm.com> | 2021-04-14 12:16:49 +0100 |
---|---|---|
committer | Mohmun02 <MohammedSuhail.Munshi@arm.com> | 2021-04-19 17:32:51 +0000 |
commit | ae1c9fee8bb6073abf2a419af3266f23dbab82a9 (patch) | |
tree | 33d8caab3c4334308878bc6a09c62e67124487a3 /src/core/gpu/cl | |
parent | fe7ae817755577be29f4c07aa27d8ef9e821da45 (diff) | |
download | ComputeLibrary-ae1c9fee8bb6073abf2a419af3266f23dbab82a9.tar.gz |
Added S32 Integer support to DIV operator in CLElementWiseOperations with Tests
Partially Resolves : COMPMID-3793
Signed-off-by: Suhail Munshi <MohammedSuhail.Munshi@arm.com>
Change-Id: I14d6884c34f33a6caee11fc1230f9d2d3ae6c4c1
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/5425
Reviewed-by: Giorgio Arena <giorgio.arena@arm.com>
Reviewed-by: Manuel Bottini <manuel.bottini@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src/core/gpu/cl')
-rw-r--r-- | src/core/gpu/cl/kernels/ClElementwiseKernel.cpp | 35 |
1 files changed, 33 insertions, 2 deletions
diff --git a/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp b/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp index 8f12eb2215..335ee9c392 100644 --- a/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp +++ b/src/core/gpu/cl/kernels/ClElementwiseKernel.cpp @@ -98,6 +98,29 @@ Status validate_arguments_with_float_only_supported_rules(const ITensorInfo &src return Status{}; } +Status validate_arguments_divide_operation(const ITensorInfo* src1, const ITensorInfo* src2, const ITensorInfo* dst) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src1); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src1, 1, DataType::F16, DataType::F32, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, src2); + + const TensorShape out_shape = TensorShape::broadcast_shape(src1->tensor_shape(), src2->tensor_shape()); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); + + // Validate in case of configured dst + if(dst->total_size() > 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst, 1, DataType::F16, DataType::F32, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src1, dst); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0), + "Wrong shape for dst"); + } + + return Status{}; +} + Status validate_arguments_with_arithmetic_rules(const ITensorInfo &src1, const ITensorInfo &src2, const ITensorInfo &dst) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&src1); @@ -180,6 +203,8 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &s build_opts.add_option("-DSCALE_IN2=" + float_to_string_with_full_precision(iq2info.scale)); build_opts.add_option("-DSCALE_OUT=" + float_to_string_with_full_precision(oqinfo.scale)); } + build_opts.add_option_if(src1.data_type() == DataType::S32, "-DS32"); + return build_opts; } @@ -459,9 +484,15 @@ void ClArithmeticKernel::configure(const ClCompileContext &compile_context, Arit Status ClArithmeticKernel::validate(ArithmeticOperation op, const ITensorInfo *src1, const ITensorInfo *src2, const ITensorInfo *dst, const ActivationLayerInfo &act_info) { ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src1, src2, dst); - if(op == ArithmeticOperation::DIV || op == ArithmeticOperation::POWER) + if(op == ArithmeticOperation::DIV) { - // Division and Power operators don't support integer arithmetic + // Partial integer support S32/F32/F16 + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_divide_operation(src1, src2, dst)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_division(*src1->clone(), *src2->clone(), *dst->clone()).first); + } + else if(op == ArithmeticOperation::POWER) + { + // Power operators doesn't support integer arithmetic ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_with_float_only_supported_rules(*src1, *src2, *dst)); ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_division(*src1->clone(), *src2->clone(), *dst->clone()).first); } |