diff options
-rw-r--r-- | src/core/CL/kernels/CLElementwiseOperationKernel.cpp | 102 | ||||
-rw-r--r-- | tests/validation/CL/ArithmeticDivision.cpp | 141 |
2 files changed, 94 insertions, 149 deletions
diff --git a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp index 5dc5b7e13f..37eeeb78bf 100644 --- a/src/core/CL/kernels/CLElementwiseOperationKernel.cpp +++ b/src/core/CL/kernels/CLElementwiseOperationKernel.cpp @@ -64,6 +64,29 @@ std::string generate_id_for_tuning_common(const std::string &kernel_name, const return config_id; } +Status validate_arguments_with_division_rules(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(&input1, &input2, &output); + ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&input1, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &input2); + + const TensorShape out_shape = TensorShape::broadcast_shape(input1.tensor_shape(), input2.tensor_shape()); + + ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible"); + + // Validate in case of configured output + if(output.total_size() > 0) + { + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(&output, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(&input1, &output); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, output.tensor_shape(), 0), + "Wrong shape for output"); + } + + return Status{}; +} + Status validate_arguments_with_arithmetic_rules(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(&input1); @@ -119,6 +142,26 @@ CLBuildOptions generate_build_options_with_arithmetic_rules(const ITensorInfo &i return build_opts; } +std::pair<Status, Window> configure_window_arithmetic_common(const ValidRegion &valid_region, ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) +{ + Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); + Window win_input1 = win.broadcast_if_dimension_le_one(input1); + Window win_input2 = win.broadcast_if_dimension_le_one(input2); + + AccessWindowHorizontal input1_access(&input1, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal input2_access(&input2, 0, num_elems_processed_per_iteration); + AccessWindowHorizontal output_access(&output, 0, num_elems_processed_per_iteration); + + bool window_changed = update_window_and_padding(win_input1, input1_access) + || update_window_and_padding(win_input2, input2_access) + || update_window_and_padding(win, output_access); + + output_access.set_valid_region(win, valid_region); + + Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; + return std::make_pair(err, win); +} + std::pair<Status, Window> validate_and_configure_window_for_arithmetic_operators(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) { const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2); @@ -140,22 +183,16 @@ std::pair<Status, Window> validate_and_configure_window_for_arithmetic_operators set_format_if_unknown(output, Format::F32); } - Window win = calculate_max_window(valid_region, Steps(num_elems_processed_per_iteration)); - Window win_input1 = win.broadcast_if_dimension_le_one(input1); - Window win_input2 = win.broadcast_if_dimension_le_one(input2); - - AccessWindowHorizontal input1_access(&input1, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal input2_access(&input2, 0, num_elems_processed_per_iteration); - AccessWindowHorizontal output_access(&output, 0, num_elems_processed_per_iteration); - - bool window_changed = update_window_and_padding(win_input1, input1_access) - || update_window_and_padding(win_input2, input2_access) - || update_window_and_padding(win, output_access); - - output_access.set_valid_region(win, valid_region); + return configure_window_arithmetic_common(valid_region, input1, input2, output); +} - Status err = (window_changed) ? ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Insufficient Padding!") : Status{}; - return std::make_pair(err, win); +std::pair<Status, Window> validate_and_configure_window_for_division(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) +{ + const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(input1, input2); + const TensorShape &out_shape = broadcast_pair.first; + const ValidRegion &valid_region = broadcast_pair.second; + auto_init_if_empty(output, out_shape, 1, input1.data_type()); + return configure_window_arithmetic_common(valid_region, input1, input2, output); } } // namespace @@ -306,19 +343,44 @@ void CLArithmeticOperationKernel::configure(ArithmeticOperation op, const ICLTen Status CLArithmeticOperationKernel::validate(ArithmeticOperation op, const ITensorInfo *input1, const ITensorInfo *input2, const ITensorInfo *output) { - ARM_COMPUTE_UNUSED(op); ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(input1, input2, output); - ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_with_arithmetic_rules(*input1, *input2, *output)); - ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_arithmetic_operators(*input1->clone(), *input2->clone(), *output->clone()).first); + if(op == ArithmeticOperation::DIV) + { + // Division doesn't support integer arithmetic + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_with_division_rules(*input1, *input2, *output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_division(*input1->clone(), *input2->clone(), *output->clone()).first); + } + else + { + ARM_COMPUTE_RETURN_ON_ERROR(validate_arguments_with_arithmetic_rules(*input1, *input2, *output)); + ARM_COMPUTE_RETURN_ON_ERROR(validate_and_configure_window_for_arithmetic_operators(*input1->clone(), *input2->clone(), *output->clone()).first); + } + return Status{}; } std::pair<Status, Window> CLArithmeticOperationKernel::validate_and_configure_window(ITensorInfo &input1, ITensorInfo &input2, ITensorInfo &output) { - return validate_and_configure_window_for_arithmetic_operators(input1, input2, output); + if(_op == ArithmeticOperation::DIV) + { + // Division doesn't support integer arithmetic + return validate_and_configure_window_for_division(input1, input2, output); + } + else + { + return validate_and_configure_window_for_arithmetic_operators(input1, input2, output); + } } Status CLArithmeticOperationKernel::validate_arguments(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) { - return validate_arguments_with_arithmetic_rules(input1, input2, output); + if(_op == ArithmeticOperation::DIV) + { + // Division doesn't support integer arithmetic + return validate_arguments_with_division_rules(input1, input2, output); + } + else + { + return validate_arguments_with_arithmetic_rules(input1, input2, output); + } } CLBuildOptions CLArithmeticOperationKernel::generate_build_options(const ITensorInfo &input1, const ITensorInfo &input2, const ITensorInfo &output) diff --git a/tests/validation/CL/ArithmeticDivision.cpp b/tests/validation/CL/ArithmeticDivision.cpp index 87039d775f..330a999c24 100644 --- a/tests/validation/CL/ArithmeticDivision.cpp +++ b/tests/validation/CL/ArithmeticDivision.cpp @@ -48,13 +48,6 @@ RelativeTolerance<float> tolerance_fp16(0.001f); constexpr unsigned int num_elems_processed_per_iteration = 16; /** Input data sets **/ -const auto ArithmeticDivisionU8Dataset = combine(combine(framework::dataset::make("DataType", DataType::U8), framework::dataset::make("DataType", DataType::U8)), framework::dataset::make("DataType", - DataType::U8)); -const auto ArithmeticDivisionQASYMM8Dataset = combine(combine(framework::dataset::make("DataType", DataType::QASYMM8), framework::dataset::make("DataType", DataType::QASYMM8)), - framework::dataset::make("DataType", - DataType::QASYMM8)); -const auto ArithmeticDivisionS16Dataset = combine(combine(framework::dataset::make("DataType", { DataType::U8, DataType::S16 }), framework::dataset::make("DataType", DataType::S16)), - framework::dataset::make("DataType", DataType::S16)); const auto ArithmeticDivisionFP16Dataset = combine(combine(framework::dataset::make("DataType", DataType::F16), framework::dataset::make("DataType", DataType::F16)), framework::dataset::make("DataType", DataType::F16)); const auto ArithmeticDivisionFP32Dataset = combine(combine(framework::dataset::make("DataType", DataType::F32), framework::dataset::make("DataType", DataType::F32)), @@ -67,25 +60,25 @@ TEST_SUITE(ArithmeticDivision) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( - framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), // Window shrink - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Invalid data type combination + framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Window shrink + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes }), - framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::U8), - TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, true, false, false, false})), + framework::dataset::make("Expected", { true, false, false, false, false})), input1_info, input2_info, output_info, expected) { ARM_COMPUTE_EXPECT(bool(CLArithmeticDivision::validate(&input1_info.clone()->set_is_resizable(false), &input2_info.clone()->set_is_resizable(false), &output_info.clone()->set_is_resizable(false))) == expected, framework::LogLevel::ERRORS); @@ -96,116 +89,6 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip( template <typename T> using CLArithmeticDivisionFixture = ArithmeticDivisionValidationFixture<CLTensor, CLAccessor, CLArithmeticDivision, T>; -TEST_SUITE(U8) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), - shape) -{ - // Create tensors - CLTensor ref_src1 = create_tensor<CLTensor>(shape, DataType::U8); - CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::U8); - CLTensor dst = create_tensor<CLTensor>(shape, DataType::U8); - - // Create and Configure function - CLArithmeticDivision add; - add.configure(&ref_src1, &ref_src2, &dst); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); - validate(ref_src1.info()->padding(), padding); - validate(ref_src2.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ArithmeticDivisionU8Dataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - -template <typename T> -using CLArithmeticDivisionQuantizedFixture = ArithmeticDivisionValidationQuantizedFixture<CLTensor, CLAccessor, CLArithmeticDivision, T>; - -TEST_SUITE(Quantized) -TEST_SUITE(QASYMM8) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), - shape) -{ - // Create tensors - CLTensor ref_src1 = create_tensor<CLTensor>(shape, DataType::QASYMM8); - CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::QASYMM8); - CLTensor dst = create_tensor<CLTensor>(shape, DataType::QASYMM8); - - // Create and Configure function - CLArithmeticDivision add; - add.configure(&ref_src1, &ref_src2, &dst); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); - validate(ref_src1.info()->padding(), padding); - validate(ref_src2.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionQuantizedFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(combine(datasets::SmallShapes(), - ArithmeticDivisionQASYMM8Dataset), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(5.f / 255.f, 20) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(2.f / 255.f, 10) })), - framework::dataset::make("QuantizationInfo", { QuantizationInfo(1.f / 255.f, 5) })) - - ) -{ - // Validate output - validate(CLAccessor(_target), _reference, tolerance_fp32, 0.01); -} -TEST_SUITE_END() -TEST_SUITE_END() - -TEST_SUITE(S16) -DATA_TEST_CASE(Configuration, framework::DatasetMode::ALL, combine(framework::dataset::concat(datasets::SmallShapes(), datasets::LargeShapes()), framework::dataset::make("DataType", { DataType::U8, DataType::S16 })), - shape, data_type) -{ - // Create tensors - CLTensor ref_src1 = create_tensor<CLTensor>(shape, data_type); - CLTensor ref_src2 = create_tensor<CLTensor>(shape, DataType::S16); - CLTensor dst = create_tensor<CLTensor>(shape, DataType::S16); - - // Create and Configure function - CLArithmeticDivision add; - add.configure(&ref_src1, &ref_src2, &dst); - - // Validate valid region - const ValidRegion valid_region = shape_to_valid_region(shape); - validate(dst.info()->valid_region(), valid_region); - - // Validate padding - const PaddingSize padding = PaddingCalculator(shape.x(), num_elems_processed_per_iteration).required_padding(); - validate(ref_src1.info()->padding(), padding); - validate(ref_src2.info()->padding(), padding); - validate(dst.info()->padding(), padding); -} - -FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(datasets::SmallShapes(), ArithmeticDivisionS16Dataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} - -FIXTURE_DATA_TEST_CASE(RunLarge, CLArithmeticDivisionFixture<int16_t>, framework::DatasetMode::NIGHTLY, combine(datasets::LargeShapes(), ArithmeticDivisionS16Dataset)) -{ - // Validate output - validate(CLAccessor(_target), _reference); -} -TEST_SUITE_END() - TEST_SUITE(Float) TEST_SUITE(FP16) FIXTURE_DATA_TEST_CASE(RunSmall, CLArithmeticDivisionFixture<half>, framework::DatasetMode::ALL, combine(datasets::SmallShapes(), ArithmeticDivisionFP16Dataset)) |