From ec320d9fc418e2d95a3a38ce87233397535f467d Mon Sep 17 00:00:00 2001 From: Ramy Elgammal Date: Wed, 14 Dec 2022 09:20:09 +0000 Subject: Add Subtraction operator to Dynamic Fusion interface Partially-Resolves: COMPMID-5518 Change-Id: I8358784815bcac461d50e384fa7bc96f476d3983 Signed-off-by: Ramy Elgammal Signed-off-by: Jakub Sujak Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9045 Comments-Addressed: Arm Jenkins Reviewed-by: Viet-Hoa Do Reviewed-by: SiCong Li Tested-by: Arm Jenkins Dynamic-Fusion: SiCong Li Benchmark: Arm Jenkins --- Android.bp | 1 + .../dynamic_fusion/sketch/gpu/operators/GpuAdd.h | 2 +- .../dynamic_fusion/sketch/gpu/operators/GpuCast.h | 1 - .../sketch/gpu/operators/GpuReshape.h | 5 +- .../sketch/gpu/operators/GpuResize.h | 1 - .../dynamic_fusion/sketch/gpu/operators/GpuSub.h | 97 ++++++++ filelist.json | 1 + src/core/CL/cl_kernels/tile_helpers.h | 11 +- .../components/cl/ClComponentElementwiseBinary.cpp | 1 + src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp | 76 ++++++ .../cl/ClTemplateElementwiseBinary.cpp | 3 + tests/validation/CL/ArithmeticAddition.cpp | 3 + tests/validation/CL/ArithmeticSubtraction.cpp | 3 + tests/validation/CL/PixelWiseMultiplication.cpp | 3 + tests/validation/dynamic_fusion/gpu/cl/Add.cpp | 2 +- tests/validation/dynamic_fusion/gpu/cl/Sub.cpp | 259 +++++++++++++++++++++ 16 files changed, 461 insertions(+), 8 deletions(-) create mode 100644 arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h create mode 100644 src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp create mode 100644 tests/validation/dynamic_fusion/gpu/cl/Sub.cpp diff --git a/Android.bp b/Android.bp index 78aa64a5f9..23b19c2862 100644 --- a/Android.bp +++ b/Android.bp @@ -625,6 +625,7 @@ cc_library_static { "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp", "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp", diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h index 796fd6f83a..33eded4dff 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h @@ -79,7 +79,7 @@ public: static Status is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs); - /** Validate the operator and check if the its configuration is supported and if it can be fused into the workload sketch. + /** Validate the operator and check if the its configuration is supported and if it can be fused into the workload sketch. * * Parameters are similar to @ref GpuAdd::create_op() * diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h index 1ba05ae5b8..83b004b8b8 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h @@ -24,7 +24,6 @@ #ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST #define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST -#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h" namespace arm_compute diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h index 69c7a3a76a..0f50127199 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h @@ -24,10 +24,13 @@ #ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESHAPE #define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESHAPE -#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/dynamic_fusion/sketch/attributes/ReshapeAttributes.h" + namespace arm_compute { +/** Forward declaration */ +class ITensorInfo; + namespace experimental { namespace dynamic_fusion diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h index f9661c1c24..2579d10f5b 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h @@ -25,7 +25,6 @@ #ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE #define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE -#include "arm_compute/core/ITensorInfo.h" #include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h" namespace arm_compute diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h new file mode 100644 index 0000000000..6f8c2d0b76 --- /dev/null +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h @@ -0,0 +1,97 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB +#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB + +#include "arm_compute/core/Error.h" + +namespace arm_compute +{ +/** Forward declaration */ +class ITensorInfo; + +namespace experimental +{ +namespace dynamic_fusion +{ +/** Forward declaration */ +class GpuWorkloadContext; +class GpuWorkloadSketch; + +/** Operator interface. */ +class GpuSub final +{ +public: + /** Create an operator and fuse it into the workload sketch. + * @note If @ref validate_op() fails, the creation also fails and may throw an error. + * @note If @ref validate_op() fails, @p sketch remains unchanged and valid. + * + * Valid data type configurations: + * |lhs |rhs |dst | + * |:--------------|:--------------|:-------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * |S32 |S32 |S32 | + * |S16 |S16 |S16 | + * |U8 |U8 |U8 | + * + * Valid data layouts: + * - Any + * + * @param[in,out] sketch Workload sketch into which the operator will be fused + * @param[in] lhs Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32. + * @param[in] rhs Right hand side tensor info. Same as @p lhs. + * + * @return Pointer for the destination tensor info + */ + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs); + + /** Check if the operator configuration is supported, irrespective of fusion + * + * @param[in] context Workload context within which the operator is running + * @param[in] lhs Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32. + * @param[in] rhs Right hand side tensor info. Same as @p lhs. + * + * @return Status + */ + static Status is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *lhs, + const ITensorInfo *rhs); + + /** Validate the operator and check if its configuration is supported and if it can be fused into the workload sketch. + * + * Parameters are similar to @ref GpuSub::create_op() + * + * @return Status + */ + static Status validate_op(const GpuWorkloadSketch &sketch, + const ITensorInfo *rhs, + const ITensorInfo *lhs); +}; +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB */ diff --git a/filelist.json b/filelist.json index a66d2a3384..aec4fa8188 100644 --- a/filelist.json +++ b/filelist.json @@ -2241,6 +2241,7 @@ "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp", "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp", diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h index baee0d561b..1e4dddd2db 100644 --- a/src/core/CL/cl_kernels/tile_helpers.h +++ b/src/core/CL/cl_kernels/tile_helpers.h @@ -21,11 +21,11 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ +#ifndef SRC_CORE_CL_CL_KERNELS_TILE_HELPERS +#define SRC_CORE_CL_CL_KERNELS_TILE_HELPERS // *INDENT-OFF* // clang-format off -#ifndef ARM_COMPUTE_TILE_HELPERS_H -#define ARM_COMPUTE_TILE_HELPERS_H #define TILE_VECTOR_SIZE1 1 #define TILE_VECTOR_SIZE2 2 @@ -1062,6 +1062,7 @@ #define ACTIVATION_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) #define V_ADD(A_VAL, B_VAL) ((A_VAL) + (B_VAL)) +#define V_SUB(A_VAL, B_VAL) ((A_VAL) - (B_VAL)) #define V_DIV(A_VAL, B_VAL) ((A_VAL) / (B_VAL)) #define V_MUL(A_VAL, B_VAL) ((A_VAL) * (B_VAL)) @@ -1129,6 +1130,9 @@ #define T_ELTWISE_BROADCAST_LHS_X_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) #define T_ELTWISE_BROADCAST_RHS_X_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) +#define T_ELTWISE_BROADCAST_LHS_X_SUB(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) +#define T_ELTWISE_BROADCAST_RHS_X_SUB(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) + #define T_ELTWISE_BROADCAST_DIV_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) #define T_ELTWISE_BROADCAST_LHS_X_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) @@ -1196,6 +1200,7 @@ }) #define T_ELTWISE_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) +#define T_ELTWISE_SUB(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) #define T_ELTWISE_DIV(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) #define T_ELTWISE_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) @@ -1288,4 +1293,4 @@ }) \ }) -#endif // ARM_COMPUTE_TILE_HELPERS_H +#endif /* SRC_CORE_CL_CL_KERNELS_TILE_HELPERS */ diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index 2611d6d575..b21c7c382f 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -38,6 +38,7 @@ namespace std::set supported_ops { ElementwiseBinaryCommonAttributes::ElementwiseOp::Add, + ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub, ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul }; } diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp new file mode 100644 index 0000000000..8240008f2a --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp @@ -0,0 +1,76 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" + +#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +Status GpuSub::validate_op(const GpuWorkloadSketch &sketch, + const ITensorInfo *lhs, + const ITensorInfo *rhs) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Sub then call the elementwise common validate_op + ElementwiseBinaryCommonAttributes common_attributes{}; + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub); + return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); +} + +Status GpuSub::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *lhs, + const ITensorInfo *rhs) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Sub then call the elementwise common is_supported_op + ElementwiseBinaryCommonAttributes common_attributes{}; + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub); + return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); +} + +ITensorInfo *GpuSub::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs) +{ + // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op() + // Set the elementwise operation to Sub then call the elementwise common create_op + ElementwiseBinaryCommonAttributes common_attributes{}; + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub); + return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes); +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp index 0dd7ca5e78..52164ba41d 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp @@ -185,6 +185,9 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt case Attributes::ElementwiseOp::Add: lut["ELTWISE_OP"] = "ADD"; break; + case Attributes::ElementwiseOp::Sub: + lut["ELTWISE_OP"] = "SUB"; + break; case Attributes::ElementwiseOp::Mul: lut["ELTWISE_OP"] = "MUL"; break; diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp index 45632dc7e2..1ed3a105dc 100644 --- a/tests/validation/CL/ArithmeticAddition.cpp +++ b/tests/validation/CL/ArithmeticAddition.cpp @@ -41,6 +41,9 @@ namespace test { namespace validation { +/** Synced with tests/validation/dynamic_fusion/gpu/cl/Add.cpp from the dynamic fusion interface. + * Please check there for any differences in the coverage + */ namespace { /** Input data sets **/ diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp index 6a82471dfa..5825ce2e5d 100644 --- a/tests/validation/CL/ArithmeticSubtraction.cpp +++ b/tests/validation/CL/ArithmeticSubtraction.cpp @@ -41,6 +41,9 @@ namespace test { namespace validation { +/** Synced with tests/validation/dynamic_fusion/gpu/cl/Sub.cpp from the dynamic fusion interface. + * Please check there for any differences in the coverage + */ namespace { /** Input data sets **/ diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp index 84aa2e7ee6..62ff15a37f 100644 --- a/tests/validation/CL/PixelWiseMultiplication.cpp +++ b/tests/validation/CL/PixelWiseMultiplication.cpp @@ -36,6 +36,9 @@ namespace test { namespace validation { +/** Synced with tests/validation/dynamic_fusion/gpu/cl/Mul.cpp from the dynamic fusion interface. + * Please check there for any differences in the coverage + */ namespace { namespace diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp index 52ba0520ad..afe5ee4da1 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp @@ -43,7 +43,7 @@ namespace validation { /* Synced with tests/validation/CL/ArithmeticAddition.cpp from the standard interface. * - * Difference | Why the difference + * Difference | Why the difference * No quantized tests | Not supported yet * No in place tests | Not supported yet * No activation tests | Not needed in dynamic fusion interface diff --git a/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp new file mode 100644 index 0000000000..977e0110da --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp @@ -0,0 +1,259 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" + +#include "tests/datasets/DynamicFusionDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/* Synced with tests/validation/CL/ArithmeticSubtraction.cpp from the standard interface. + * + * Difference | Why the difference + * No quantized tests | Not supported yet + * No in place tests | Not supported yet + * No activation tests | Not needed in dynamic fusion interface + * + */ +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(SUB) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U32), // Unsupported data type U32 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // S16 is valid data type for Sub + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // S32 is valid data type for Sub + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed + TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::S16), // Broadcast Z dimension is not allowed + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed + }), + framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs + TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32), + TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), + })), + framework::dataset::make("Expected", { true, false, false, false, false, true, true, false, true, true, false, false, true })), + input1_info, input2_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Validate Elementwise Sub + auto lhs_info = sketch.create_tensor_info(input1_info); + auto rhs_info = sketch.create_tensor_info(input2_info); + + bool res = bool(GpuSub::validate_op(sketch, &lhs_info, &rhs_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template +using DynamicFusionCLSubFixture = DynamicFusionGpuElementwiseBinaryOneOpValidationFixture; + +template +using DynamicFusionCLSubBroadcastFixture = DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture; + +template +using DynamicFusionCLSubTwoOpsFixture = DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture; + +TEST_SUITE(FP32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLSubFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLargeOneOp, + DynamicFusionCLSubFixture, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::LargeShapes()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLSubBroadcastFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, + DynamicFusionCLSubBroadcastFixture, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::TemporaryLimitedLargeShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionCLSubTwoOpsFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false })), + framework::dataset::make("FuseTwoOps", { true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // FP32 + +TEST_SUITE(FP16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLSubFixture, + framework::DatasetMode::ALL, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLSubBroadcastFixture, + framework::DatasetMode::ALL, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} + +TEST_SUITE_END() // FP16 + +TEST_SUITE(S32) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLSubFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::S32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S32 + +TEST_SUITE(S16) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLSubFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::S16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionCLSubFixture, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::LargeShapes()), + framework::dataset::make("DataType", { DataType::S16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // S16 + +TEST_SUITE(U8) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLSubFixture, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::U8 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference); +} +TEST_SUITE_END() // U8 + +TEST_SUITE_END() // SUB +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute -- cgit v1.2.1