aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorRamy Elgammal <ramy.elgammal@arm.com>2022-12-14 09:20:09 +0000
committerJakub Sujak <jakub.sujak@arm.com>2023-02-01 09:03:58 +0000
commitec320d9fc418e2d95a3a38ce87233397535f467d (patch)
treec0869324fa256ca559ef8b9f825393da727c3f7c
parent464ed2087c2ce2d2e741cc1e1dc4bd49d06e7d26 (diff)
downloadComputeLibrary-ec320d9fc418e2d95a3a38ce87233397535f467d.tar.gz
Add Subtraction operator to Dynamic Fusion interface
Partially-Resolves: COMPMID-5518 Change-Id: I8358784815bcac461d50e384fa7bc96f476d3983 Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com> Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/9045 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Viet-Hoa Do <viet-hoa.do@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Dynamic-Fusion: SiCong Li <sicong.li@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--Android.bp1
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h2
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h1
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h5
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h1
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h97
-rw-r--r--filelist.json1
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h11
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp1
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp76
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp3
-rw-r--r--tests/validation/CL/ArithmeticAddition.cpp3
-rw-r--r--tests/validation/CL/ArithmeticSubtraction.cpp3
-rw-r--r--tests/validation/CL/PixelWiseMultiplication.cpp3
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Add.cpp2
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Sub.cpp259
16 files changed, 461 insertions, 8 deletions
diff --git a/Android.bp b/Android.bp
index 78aa64a5f9..23b19c2862 100644
--- a/Android.bp
+++ b/Android.bp
@@ -625,6 +625,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp",
"src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
index 796fd6f83a..33eded4dff 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
@@ -79,7 +79,7 @@ public:
static Status is_supported_op(const GpuWorkloadContext &context,
const ITensorInfo *lhs,
const ITensorInfo *rhs);
- /** Validate the operator and check if the its configuration is supported and if it can be fused into the workload sketch.
+ /** Validate the operator and check if the its configuration is supported and if it can be fused into the workload sketch.
*
* Parameters are similar to @ref GpuAdd::create_op()
*
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h
index 1ba05ae5b8..83b004b8b8 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h
@@ -24,7 +24,6 @@
#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST
#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCAST
-#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
namespace arm_compute
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h
index 69c7a3a76a..0f50127199 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h
@@ -24,10 +24,13 @@
#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESHAPE
#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESHAPE
-#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/ReshapeAttributes.h"
+
namespace arm_compute
{
+/** Forward declaration */
+class ITensorInfo;
+
namespace experimental
{
namespace dynamic_fusion
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h
index f9661c1c24..2579d10f5b 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuResize.h
@@ -25,7 +25,6 @@
#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE
#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPURESIZE
-#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h"
namespace arm_compute
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h
new file mode 100644
index 0000000000..6f8c2d0b76
--- /dev/null
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h
@@ -0,0 +1,97 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB
+#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB
+
+#include "arm_compute/core/Error.h"
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuWorkloadContext;
+class GpuWorkloadSketch;
+
+/** Operator interface. */
+class GpuSub final
+{
+public:
+ /** Create an operator and fuse it into the workload sketch.
+ * @note If @ref validate_op() fails, the creation also fails and may throw an error.
+ * @note If @ref validate_op() fails, @p sketch remains unchanged and valid.
+ *
+ * Valid data type configurations:
+ * |lhs |rhs |dst |
+ * |:--------------|:--------------|:-------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ * |S32 |S32 |S32 |
+ * |S16 |S16 |S16 |
+ * |U8 |U8 |U8 |
+ *
+ * Valid data layouts:
+ * - Any
+ *
+ * @param[in,out] sketch Workload sketch into which the operator will be fused
+ * @param[in] lhs Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32.
+ * @param[in] rhs Right hand side tensor info. Same as @p lhs.
+ *
+ * @return Pointer for the destination tensor info
+ */
+ static ITensorInfo *create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *lhs,
+ ITensorInfo *rhs);
+
+ /** Check if the operator configuration is supported, irrespective of fusion
+ *
+ * @param[in] context Workload context within which the operator is running
+ * @param[in] lhs Left hand side tensor info. Data types supported: U8/S16/S32/F16/F32.
+ * @param[in] rhs Right hand side tensor info. Same as @p lhs.
+ *
+ * @return Status
+ */
+ static Status is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs);
+
+ /** Validate the operator and check if its configuration is supported and if it can be fused into the workload sketch.
+ *
+ * Parameters are similar to @ref GpuSub::create_op()
+ *
+ * @return Status
+ */
+ static Status validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *rhs,
+ const ITensorInfo *lhs);
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUSUB */
diff --git a/filelist.json b/filelist.json
index a66d2a3384..aec4fa8188 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2241,6 +2241,7 @@
"src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp",
"src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp",
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index baee0d561b..1e4dddd2db 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -21,11 +21,11 @@
* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
* SOFTWARE.
*/
+#ifndef SRC_CORE_CL_CL_KERNELS_TILE_HELPERS
+#define SRC_CORE_CL_CL_KERNELS_TILE_HELPERS
// *INDENT-OFF*
// clang-format off
-#ifndef ARM_COMPUTE_TILE_HELPERS_H
-#define ARM_COMPUTE_TILE_HELPERS_H
#define TILE_VECTOR_SIZE1 1
#define TILE_VECTOR_SIZE2 2
@@ -1062,6 +1062,7 @@
#define ACTIVATION_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x) ACT_OP_QUANTIZED(op, DATA_TYPE, VEC_SIZE, ZERO_VALUE, A_VAL, B_VAL, x)
#define V_ADD(A_VAL, B_VAL) ((A_VAL) + (B_VAL))
+#define V_SUB(A_VAL, B_VAL) ((A_VAL) - (B_VAL))
#define V_DIV(A_VAL, B_VAL) ((A_VAL) / (B_VAL))
#define V_MUL(A_VAL, B_VAL) ((A_VAL) * (B_VAL))
@@ -1129,6 +1130,9 @@
#define T_ELTWISE_BROADCAST_LHS_X_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
#define T_ELTWISE_BROADCAST_RHS_X_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_BROADCAST_LHS_X_SUB(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_BROADCAST_RHS_X_SUB(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+
#define T_ELTWISE_BROADCAST_DIV_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
#define T_ELTWISE_BROADCAST_LHS_X_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
@@ -1196,6 +1200,7 @@
})
#define T_ELTWISE_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_SUB(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_SUB, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
#define T_ELTWISE_DIV(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
#define T_ELTWISE_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
@@ -1288,4 +1293,4 @@
}) \
})
-#endif // ARM_COMPUTE_TILE_HELPERS_H
+#endif /* SRC_CORE_CL_CL_KERNELS_TILE_HELPERS */
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
index 2611d6d575..b21c7c382f 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
@@ -38,6 +38,7 @@ namespace
std::set<ElementwiseBinaryCommonAttributes::ElementwiseOp> supported_ops
{
ElementwiseBinaryCommonAttributes::ElementwiseOp::Add,
+ ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub,
ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul
};
}
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp
new file mode 100644
index 0000000000..8240008f2a
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp
@@ -0,0 +1,76 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+
+#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+Status GpuSub::validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+ // Set the elementwise operation to Sub then call the elementwise common validate_op
+ ElementwiseBinaryCommonAttributes common_attributes{};
+ common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub);
+ return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
+}
+
+Status GpuSub::is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+ // Set the elementwise operation to Sub then call the elementwise common is_supported_op
+ ElementwiseBinaryCommonAttributes common_attributes{};
+ common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub);
+ return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
+}
+
+ITensorInfo *GpuSub::create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *lhs,
+ ITensorInfo *rhs)
+{
+ // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op()
+ // Set the elementwise operation to Sub then call the elementwise common create_op
+ ElementwiseBinaryCommonAttributes common_attributes{};
+ common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub);
+ return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
index 0dd7ca5e78..52164ba41d 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
@@ -185,6 +185,9 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
case Attributes::ElementwiseOp::Add:
lut["ELTWISE_OP"] = "ADD";
break;
+ case Attributes::ElementwiseOp::Sub:
+ lut["ELTWISE_OP"] = "SUB";
+ break;
case Attributes::ElementwiseOp::Mul:
lut["ELTWISE_OP"] = "MUL";
break;
diff --git a/tests/validation/CL/ArithmeticAddition.cpp b/tests/validation/CL/ArithmeticAddition.cpp
index 45632dc7e2..1ed3a105dc 100644
--- a/tests/validation/CL/ArithmeticAddition.cpp
+++ b/tests/validation/CL/ArithmeticAddition.cpp
@@ -41,6 +41,9 @@ namespace test
{
namespace validation
{
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/Add.cpp from the dynamic fusion interface.
+ * Please check there for any differences in the coverage
+ */
namespace
{
/** Input data sets **/
diff --git a/tests/validation/CL/ArithmeticSubtraction.cpp b/tests/validation/CL/ArithmeticSubtraction.cpp
index 6a82471dfa..5825ce2e5d 100644
--- a/tests/validation/CL/ArithmeticSubtraction.cpp
+++ b/tests/validation/CL/ArithmeticSubtraction.cpp
@@ -41,6 +41,9 @@ namespace test
{
namespace validation
{
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/Sub.cpp from the dynamic fusion interface.
+ * Please check there for any differences in the coverage
+ */
namespace
{
/** Input data sets **/
diff --git a/tests/validation/CL/PixelWiseMultiplication.cpp b/tests/validation/CL/PixelWiseMultiplication.cpp
index 84aa2e7ee6..62ff15a37f 100644
--- a/tests/validation/CL/PixelWiseMultiplication.cpp
+++ b/tests/validation/CL/PixelWiseMultiplication.cpp
@@ -36,6 +36,9 @@ namespace test
{
namespace validation
{
+/** Synced with tests/validation/dynamic_fusion/gpu/cl/Mul.cpp from the dynamic fusion interface.
+ * Please check there for any differences in the coverage
+ */
namespace
{
namespace
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
index 52ba0520ad..afe5ee4da1 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
@@ -43,7 +43,7 @@ namespace validation
{
/* Synced with tests/validation/CL/ArithmeticAddition.cpp from the standard interface.
*
- * Difference | Why the difference
+ * Difference | Why the difference
* No quantized tests | Not supported yet
* No in place tests | Not supported yet
* No activation tests | Not needed in dynamic fusion interface
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp
new file mode 100644
index 0000000000..977e0110da
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Sub.cpp
@@ -0,0 +1,259 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+#include "tests/datasets/DynamicFusionDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/* Synced with tests/validation/CL/ArithmeticSubtraction.cpp from the standard interface.
+ *
+ * Difference | Why the difference
+ * No quantized tests | Not supported yet
+ * No in place tests | Not supported yet
+ * No activation tests | Not needed in dynamic fusion interface
+ *
+ */
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(SUB)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+ framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U32), // Unsupported data type U32
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // S16 is valid data type for Sub
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // S32 is valid data type for Sub
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed
+ TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::S16), // Broadcast Z dimension is not allowed
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed
+ }),
+ framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32),
+ TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs
+ TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::S16),
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32),
+ })),
+ framework::dataset::make("Expected", { true, false, false, false, false, true, true, false, true, true, false, false, true })),
+ input1_info, input2_info, expected)
+{
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &gpu_ctx };
+
+ // Validate Elementwise Sub
+ auto lhs_info = sketch.create_tensor_info(input1_info);
+ auto rhs_info = sketch.create_tensor_info(input2_info);
+
+ bool res = bool(GpuSub::validate_op(sketch, &lhs_info, &rhs_info));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using DynamicFusionCLSubFixture = DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuSub, T>;
+
+template <typename T>
+using DynamicFusionCLSubBroadcastFixture = DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuSub, T>;
+
+template <typename T>
+using DynamicFusionCLSubTwoOpsFixture = DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuSub, T>;
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLSubFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLargeOneOp,
+ DynamicFusionCLSubFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::LargeShapes()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLSubBroadcastFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::TemporaryLimitedSmallShapesBroadcast()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp,
+ DynamicFusionCLSubBroadcastFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::TemporaryLimitedLargeShapesBroadcast()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionCLSubTwoOpsFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })),
+ framework::dataset::make("FuseTwoOps", { true })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // FP32
+
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLSubFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::F16 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLSubBroadcastFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::TemporaryLimitedSmallShapesBroadcast()),
+ framework::dataset::make("DataType", { DataType::F16 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(S32)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLSubFixture<int32_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::S32 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S32
+
+TEST_SUITE(S16)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLSubFixture<int16_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::S16 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionCLSubFixture<int16_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::LargeShapes()),
+ framework::dataset::make("DataType", { DataType::S16 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // S16
+
+TEST_SUITE(U8)
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLSubFixture<uint8_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::SUB }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::U8 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference);
+}
+TEST_SUITE_END() // U8
+
+TEST_SUITE_END() // SUB
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute