From 75eea338eb232ebdafa2fb84d22e711b5f964785 Mon Sep 17 00:00:00 2001 From: Sang-Hoon Park Date: Fri, 13 Nov 2020 13:44:13 +0000 Subject: COMPMID-3961: Add Logical OR/AND/NOT operator on CL Change-Id: I612aeed6affa17624fb9044964dd59c41a5c9888 Signed-off-by: Sang-Hoon Park Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/4448 Reviewed-by: Pablo Marquez Tello Tested-by: Arm Jenkins Comments-Addressed: Arm Jenkins --- src/core/CL/CLKernelLibrary.cpp | 2 + src/core/CL/cl_kernels/elementwise_operation.cl | 5 ++ src/core/CL/cl_kernels/elementwise_unary.cl | 6 +- .../CL/kernels/CLElementWiseUnaryLayerKernel.cpp | 27 +++++++-- .../CL/kernels/CLElementwiseOperationKernel.cpp | 69 ++++++++++++++++++++++ src/core/CL/kernels/CLElementwiseOperationKernel.h | 44 ++++++++++++++ 6 files changed, 146 insertions(+), 7 deletions(-) (limited to 'src/core/CL') diff --git a/src/core/CL/CLKernelLibrary.cpp b/src/core/CL/CLKernelLibrary.cpp index 33f0da0aa1..ae8b879be3 100644 --- a/src/core/CL/CLKernelLibrary.cpp +++ b/src/core/CL/CLKernelLibrary.cpp @@ -156,6 +156,8 @@ const std::map CLKernelLibrary::_kernel_program_map = { "elementwise_operation_SQUARED_DIFF", "elementwise_operation.cl" }, { "elementwise_operation_POWER", "elementwise_operation.cl" }, { "elementwise_operation_PRELU", "elementwise_operation.cl" }, + { "elementwise_operation_AND", "elementwise_operation.cl" }, + { "elementwise_operation_OR", "elementwise_operation.cl" }, { "elementwise_operation_ADD_quantized", "elementwise_operation_quantized.cl" }, { "elementwise_operation_SUB_quantized", "elementwise_operation_quantized.cl" }, { "elementwise_operation_MAX_quantized", "elementwise_operation_quantized.cl" }, diff --git a/src/core/CL/cl_kernels/elementwise_operation.cl b/src/core/CL/cl_kernels/elementwise_operation.cl index f6c09b4ec7..ea25082a6c 100644 --- a/src/core/CL/cl_kernels/elementwise_operation.cl +++ b/src/core/CL/cl_kernels/elementwise_operation.cl @@ -40,6 +40,11 @@ #define POWER(x, y) pow(x, y) #define PRELU(x, y) (select(y * x, x, CONVERT((x > (DATA_TYPE_OUT)0), SELECT_VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)))) +#if defined(VEC_SIZE_OUT) && defined(DATA_TYPE_OUT) +#define AND(x, y) (CONVERT((x && y), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)) & 1) +#define OR(x, y) (CONVERT((x || y), VEC_DATA_TYPE(DATA_TYPE_OUT, VEC_SIZE_OUT)) & 1) +#endif // defined(VEC_SIZE_OUT) && defined(DATA_TYPE_OUT) + #define OP_FUN_NAME_STR(op) elementwise_operation_##op #define OP_FUN_NAME(op) OP_FUN_NAME_STR(op) diff --git a/src/core/CL/cl_kernels/elementwise_unary.cl b/src/core/CL/cl_kernels/elementwise_unary.cl index 3e557c0550..54c81fb04e 100644 --- a/src/core/CL/cl_kernels/elementwise_unary.cl +++ b/src/core/CL/cl_kernels/elementwise_unary.cl @@ -41,9 +41,11 @@ // Calculate round (Cannot use round function as it rounds halfway cases away from zero). #if defined(VEC_SIZE) #define round_op(input) CONVERT(CONVERT_SAT_ROUND(input, VEC_DATA_TYPE(int, VEC_SIZE), rte), VEC_DATA_TYPE(DATA_TYPE, VEC_SIZE)) -#else // defined(VEC_SIZE +#else // defined(VEC_SIZE) #define round_op(input) CONVERT(CONVERT_SAT_ROUND(input, int, rte), DATA_TYPE) -#endif // defined(VEC_SIZE +#endif // defined(VEC_SIZE) +// Calculate logical NOT +#define logical_not_op(input) ((!input) & 0x1) /** Applies element wise unary operator in a tensor. * diff --git a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp index 38a7f1bae1..0a641adcd2 100644 --- a/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp +++ b/src/core/CL/kernels/CLElementWiseUnaryLayerKernel.cpp @@ -34,16 +34,30 @@ namespace arm_compute { namespace { -Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output) +Status validate_arguments(const ITensorInfo &input, const ITensorInfo &output, const ElementWiseUnary op) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32); + if(op == ElementWiseUnary::LOGICAL_NOT) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::U8); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::F16, DataType::F32); + } // Validate in case of configured output if(output.total_size() > 0) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&output); - ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::F16, DataType::F32); + if(op == ElementWiseUnary::LOGICAL_NOT) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input, 1, DataType::U8); + } + else + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::F16, DataType::F32); + } ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input, &output); ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(&input, &output); } @@ -63,7 +77,7 @@ void CLElementWiseUnaryLayerKernel::configure(const CLCompileContext &compile_co auto padding_info = get_padding_info({ input, output }); - ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input, *output)); + ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(*input, *output, op)); const std::string kernel_name = "elementwise_unary"; const int vec_size_x = 16 / output->element_size(); @@ -98,6 +112,9 @@ void CLElementWiseUnaryLayerKernel::configure(const CLCompileContext &compile_co case ElementWiseUnary::ROUND: build_opts.add_option("-DOPERATION=round_op"); break; + case ElementWiseUnary::LOGICAL_NOT: + build_opts.add_option("-DOPERATION=logical_not_op"); + break; default: ARM_COMPUTE_ERROR("Not implemented"); } @@ -121,7 +138,7 @@ Status CLElementWiseUnaryLayerKernel::validate(const ITensorInfo *input, const I { ARM_COMPUTE_UNUSED(op); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input, *output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments(*input, *output, op)); return Status{}; } diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index efb3fe79e3..47439e15ab 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -26,6 +26,7 @@ #include "arm_compute/core/CL/CLHelpers.h" #include "arm_compute/core/CL/ICLTensor.h" #include "src/core/CL/CLValidate.h" +#include "src/core/common/Validate.h" #include "src/core/helpers/AutoConfiguration.h" #include "src/core/helpers/WindowHelpers.h" #include "support/Cast.h" @@ -220,6 +221,18 @@ std::pair validate_and_configure_window_for_arithmetic_operators return configure_window_arithmetic_common(output); } +std::pair validate_and_configure_window_for_logical_binary_operators(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) +{ + const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2); + const TensorShape &out_shape = broadcast_pair.first; + + set_shape_if_empty(output, out_shape); + set_data_type_if_unknown(output, DataType::U8); + + // The arithmetic utility functions can be share + return configure_window_arithmetic_common(output); +} + std::pair validate_and_configure_window_for_division(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) { const std::pair broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2); @@ -319,6 +332,62 @@ void CLElementwiseOperationKernel::run_op(ITensorPack &tensors, const Window &wi while(collapsed.slide_window_slice_3D(slice)); } +/** Logical binary */ +void CLLogicalBinaryKernel::configure(const CLCompileContext &compile_context, kernels::LogicalOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output) +{ + ARM_COMPUTE_ERROR_ON_NULLPTR(input1, input2, output); + ARM_COMPUTE_ERROR_THROW_ON(CLLogicalBinaryKernel::validate(op, input1, input2, output)); + _op = op; + configure_common(compile_context, input1, input2, output); +} + +Status CLLogicalBinaryKernel::validate(kernels::LogicalOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) +{ + ARM_COMPUTE_UNUSED(op); + ARM_COMPUTE_ASSERT(op != kernels::LogicalOperation::Unknown && op != kernels::LogicalOperation::Not); + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); + + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(input1, 1, DataType::U8); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(input1, input2); + + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_with_arithmetic_rules(*input1, *input2, *output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_logical_binary_operators(*input1->clone(), *input2->clone(), *output->clone()).first); + + return Status{}; +} + +std::string CLLogicalBinaryKernel::name() +{ + switch(_op) + { + case kernels::LogicalOperation::And: + return "AND"; + case kernels::LogicalOperation::Or: + return "OR"; + case kernels::LogicalOperation::Not: + /* fall through */ + default: + ARM_COMPUTE_ASSERT(true); + } + return ""; +} + +std::pair CLLogicalBinaryKernel::validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) +{ + return validate_and_configure_window_for_logical_binary_operators(input1, input2, output); +} + +CLBuildOptions CLLogicalBinaryKernel::generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) +{ + // The arithmetic utility functions can be share + return generate_build_options_with_arithmetic_rules(input1, input2, output, name()); +} + +std::string CLLogicalBinaryKernel::generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) +{ + return generate_id_for_tuning_common(kernel_name, input1, output); +} + /** Arithmetic operations with saturation*/ void CLSaturatedArithmeticOperationKernel::configure(ArithmeticOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output, const ConvertPolicy &policy, diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.h b/src/core/CL/kernels/CLElementwiseOperationKernel.h index 75030cf3a3..e24d1564a8 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.h +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.h @@ -26,6 +26,7 @@ #include "arm_compute/core/Types.h" #include "src/core/CL/ICLKernel.h" +#include "src/core/KernelTypes.h" namespace arm_compute { @@ -99,6 +100,49 @@ private: ITensorInfo *_output; /**< Destination tensor info */ }; +class CLLogicalBinaryKernel : public CLElementwiseOperationKernel +{ +public: + /** Default constructor */ + CLLogicalBinaryKernel() = default; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogicalBinaryKernel(const CLLogicalBinaryKernel &) = delete; + /** Prevent instances of this class from being copied (As this class contains pointers) */ + CLLogicalBinaryKernel &operator=(const CLLogicalBinaryKernel &) = delete; + /** Allow instances of this class to be moved */ + CLLogicalBinaryKernel(CLLogicalBinaryKernel &&) = default; + /** Allow instances of this class to be moved */ + CLLogicalBinaryKernel &operator=(CLLogicalBinaryKernel &&) = default; + /** Default destructor */ + ~CLLogicalBinaryKernel() = default; + /** Function to configure kernel + * + * @param[in] compile_context The compile context to be used. + * @param[in] op Logical binary operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8. + * @param[in] input2 Second tensor input info. Data types supported: U8. + * @param[in] output Output tensor info. Data types supported: U8. + */ + void configure(const CLCompileContext &compile_context, kernels::LogicalOperation op, ITensorInfo *input1, ITensorInfo *input2, ITensorInfo *output); + /** Static function to check if the given configuration is valid for this kernel + * + * @param[in] op Logical binary operation to be executed. + * @param[in] input1 First tensor input info. Data types supported: U8. + * @param[in] input2 Second tensor input info. Data types supported: U8. + * @param[in] output Output tensor info. Data types supported: U8. + */ + static Status validate(kernels::LogicalOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output); + +private: + // Inherited methods overridden: + std::string name() override; + std::pair validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) override; + CLBuildOptions generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) override; + std::string generate_id_for_tuning(const std::string &kernel_name, const ITensorInfo &input1, const ITensorInfo &output) override; + + kernels::LogicalOperation _op{ kernels::LogicalOperation::Unknown }; +}; + /** Addition operation */ class CLSaturatedArithmeticOperationKernel : public CLElementwiseOperationKernel { -- cgit v1.2.1