aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Android.bp1
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h4
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h95
-rw-r--r--filelist.json3
-rw-r--r--src/core/CL/cl_kernels/tile_helpers.h5
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp3
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp22
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp75
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h20
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp29
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Add.cpp157
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Mul.cpp223
-rw-r--r--tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h41
-rw-r--r--tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h235
-rw-r--r--utils/TypePrinter.h84
15 files changed, 843 insertions, 154 deletions
diff --git a/Android.bp b/Android.bp
index 34f722f6fe..78aa64a5f9 100644
--- a/Android.bp
+++ b/Android.bp
@@ -619,6 +619,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
index 6ac5d4e500..796fd6f83a 100644
--- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h
@@ -25,10 +25,12 @@
#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUADD
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/ITensorInfo.h"
namespace arm_compute
{
+/** Forward declaration */
+class ITensorInfo;
+
namespace experimental
{
namespace dynamic_fusion
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h
new file mode 100644
index 0000000000..3e0ebdd96c
--- /dev/null
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h
@@ -0,0 +1,95 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMUL
+#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMUL
+
+#include "arm_compute/core/Error.h"
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuWorkloadContext;
+class GpuWorkloadSketch;
+
+/** Operator interface. */
+class GpuMul final
+{
+public:
+ /** Create an operator and fuse it into the workload sketch.
+ * @note If @ref validate_op() fails, the creation also fails and may throw an error.
+ * @note If @ref validate_op() fails, @p sketch remains unchanged and valid.
+ *
+ * Valid data type configurations:
+ * |lhs |rhs |dst |
+ * |:--------------|:--------------|:-------------|
+ * |F16 |F16 |F16 |
+ * |F32 |F32 |F32 |
+ *
+ * Valid data layouts:
+ * - Any
+ *
+ * @param[in,out] sketch Workload sketch into which the operator will be fused
+ * @param[in] lhs Left hand side tensor info. Data types supported: F16/F32.
+ * @param[in] rhs Right hand side tensor info. Data types supported: Same as @p lhs.
+ *
+ * @return Pointer for the destination tensor info
+ */
+ static ITensorInfo *create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *lhs,
+ ITensorInfo *rhs);
+
+ /** Check if the operator configuration is supported, irrespective of fusion
+ *
+ * @param[in] context Workload context within which the operator is running
+ * @param[in] lhs Left hand side tensor info. Data types supported: F16/F32.
+ * @param[in] rhs Right hand side tensor info. Data types supported: Same as @p lhs.
+ *
+ * @return Status
+ */
+ static Status is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs);
+
+ /** Validate the operator and check if the configuration is supported and if it can be fused into the workload sketch.
+ *
+ * Parameters are similar to @ref GpuMul::create_op()
+ *
+ * @return Status
+ */
+ static Status validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *rhs,
+ const ITensorInfo *lhs);
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMUL */
diff --git a/filelist.json b/filelist.json
index 3cb3a7a76f..a66d2a3384 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2234,13 +2234,14 @@
"src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp",
"src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp",
- "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp",
diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h
index 507e172dfb..baee0d561b 100644
--- a/src/core/CL/cl_kernels/tile_helpers.h
+++ b/src/core/CL/cl_kernels/tile_helpers.h
@@ -1063,6 +1063,7 @@
#define V_ADD(A_VAL, B_VAL) ((A_VAL) + (B_VAL))
#define V_DIV(A_VAL, B_VAL) ((A_VAL) / (B_VAL))
+#define V_MUL(A_VAL, B_VAL) ((A_VAL) * (B_VAL))
/** Element-wise activation for quantized types
*
@@ -1130,6 +1131,9 @@
#define T_ELTWISE_BROADCAST_DIV_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_BROADCAST_LHS_X_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_BROADCAST_RHS_X_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+
/** Element-wise scale with a constant value
*
* @note Performs: LHS * constant = DST
@@ -1193,6 +1197,7 @@
#define T_ELTWISE_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
#define T_ELTWISE_DIV(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
+#define T_ELTWISE_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst)
/** Element-wise operation between two tiles (LHS and RHS)
*
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
index 9a218b3e75..2611d6d575 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
@@ -37,7 +37,8 @@ namespace
{
std::set<ElementwiseBinaryCommonAttributes::ElementwiseOp> supported_ops
{
- ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD
+ ElementwiseBinaryCommonAttributes::ElementwiseOp::Add,
+ ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul
};
}
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
index a02160cba8..e7ee1c10df 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
@@ -22,8 +22,8 @@
* SOFTWARE.
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
namespace arm_compute
@@ -36,9 +36,13 @@ Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch,
const ITensorInfo *lhs,
const ITensorInfo *rhs)
{
- // Set the elementwise operation to ADD then call the elementwise common validate_op
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+ // Set the elementwise operation to Add then call the elementwise common validate_op
ElementwiseBinaryCommonAttributes common_attributes{};
- common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD);
+ common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add);
return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
}
@@ -46,9 +50,13 @@ Status GpuAdd::is_supported_op(const GpuWorkloadContext &context,
const ITensorInfo *lhs,
const ITensorInfo *rhs)
{
- // Set the elementwise operation to ADD then call the elementwise common is_supported_op
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+ // Set the elementwise operation to Add then call the elementwise common is_supported_op
ElementwiseBinaryCommonAttributes common_attributes{};
- common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD);
+ common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add);
return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
}
@@ -57,9 +65,9 @@ ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch,
ITensorInfo *rhs)
{
// No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op()
- // Set the elementwise operation to ADD then call the elementwise common create_op
+ // Set the elementwise operation to Add then call the elementwise common create_op
ElementwiseBinaryCommonAttributes common_attributes{};
- common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD);
+ common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add);
return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes);
}
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
new file mode 100644
index 0000000000..464a32cbad
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+
+#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+Status GpuMul::validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+ // Set the elementwise operation to Mul then call the elementwise common validate_op
+ ElementwiseBinaryCommonAttributes common_attributes{};
+ common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul);
+ return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
+}
+
+Status GpuMul::is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *lhs,
+ const ITensorInfo *rhs)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+ // Set the elementwise operation to Mul then call the elementwise common is_supported_op
+ ElementwiseBinaryCommonAttributes common_attributes{};
+ common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul);
+ return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
+}
+
+ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *lhs,
+ ITensorInfo *rhs)
+{
+ // Set the elementwise operation to Mul then call the elementwise common create_op
+ ElementwiseBinaryCommonAttributes common_attributes{};
+ common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul);
+ return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h
index cbefa379e6..0b58b6eb96 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h
+++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h
@@ -25,11 +25,12 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_INTERNAL_GPUELEMENTWISEBINARYCOMMON
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/ITensorInfo.h"
namespace arm_compute
{
+/** Forward declaration */
class ITensorInfo;
+
namespace experimental
{
namespace dynamic_fusion
@@ -39,14 +40,15 @@ class ElementwiseBinaryCommonAttributes
public:
enum class ElementwiseOp
{
- ADD, /**< (x + y) */
- SUB, /**< (x - y) */
- DIV, /**< (x / y) */
- MIN, /**< Min(x, y) */
- MAX, /**< Max(x, y) */
- SQUARED_DIFF, /**< (x - y)^2 */
- POWER, /**< x ^ y */
- PRELU, /**< y*x if x < 0, x otherwise */
+ Add, /**< (x + y) */
+ Sub, /**< (x - y) */
+ Div, /**< (x / y) */
+ Mul, /**< (x * y) */
+ Min, /**< Min(x, y) */
+ Max, /**< Max(x, y) */
+ SquaredDiff, /**< (x - y)^2 */
+ Power, /**< x ^ y */
+ Prelu, /**< y*x if x < 0, x otherwise */
};
/** Set operation*/
ElementwiseBinaryCommonAttributes &operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
index 01017ed909..0dd7ca5e78 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
@@ -68,7 +68,7 @@ std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup
code =
R"_(
- //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
+ //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
)_";
if(is_root)
@@ -139,7 +139,7 @@ R"_(
code +=
R"_(
}
- //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
+ //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
)_";
return code;
@@ -168,33 +168,34 @@ void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtab
TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- TagLUT lut{};
+ TagLUT lut{};
// Local build options
lut["meta_kernel_id"] = id();
lut["DATA_TYPE"] = get_cl_type_from_data_type(_lhs->data_type());
// Arguments and global shared variables
- lut["lhs"] = vtable.get_variable(_lhs);
- lut["rhs"] = vtable.get_variable(_rhs);
- lut["dst"] = vtable.get_variable(_dst);
+ lut["lhs"] = vtable.get_variable(_lhs);
+ lut["rhs"] = vtable.get_variable(_rhs);
+ lut["dst"] = vtable.get_variable(_dst);
lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
switch(_attributes.operation())
{
- case Attributes::ElementwiseOp::ADD:
+ case Attributes::ElementwiseOp::Add:
lut["ELTWISE_OP"] = "ADD";
break;
+ case Attributes::ElementwiseOp::Mul:
+ lut["ELTWISE_OP"] = "MUL";
+ break;
default:
ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
}
ARM_COMPUTE_ERROR_ON(
- comp_group.is_intermediate_tensor(_lhs) &&
- detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
+ comp_group.is_intermediate_tensor(_lhs) && detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
ARM_COMPUTE_ERROR_ON(
- comp_group.is_intermediate_tensor(_rhs) &&
- detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
+ comp_group.is_intermediate_tensor(_rhs) && detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
// Set broadcast parameters
// PRE: All tensors are broadcast-compatible
@@ -222,9 +223,9 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0";
lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1";
- lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" :
- (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" :
- "";
+ lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" :
+ (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" :
+ "";
return lut;
}
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
index 0385407ad2..52ba0520ad 100644
--- a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
+++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp
@@ -34,7 +34,6 @@
#include "tests/datasets/DynamicFusionDataset.h"
#include "tests/datasets/ShapeDatasets.h"
#include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h"
-#include "tests/validation/reference/ElementwiseOperations.h"
namespace arm_compute
{
@@ -42,6 +41,14 @@ namespace test
{
namespace validation
{
+/* Synced with tests/validation/CL/ArithmeticAddition.cpp from the standard interface.
+ *
+ * Difference | Why the difference
+ * No quantized tests | Not supported yet
+ * No in place tests | Not supported yet
+ * No activation tests | Not needed in dynamic fusion interface
+ *
+ */
TEST_SUITE(CL)
TEST_SUITE(DYNAMIC_FUSION)
TEST_SUITE(ADD)
@@ -49,29 +56,33 @@ TEST_SUITE(ADD)
// *INDENT-OFF*
// clang-format off
DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
- framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // S16 is valid data type for Add
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // S32 is valid data type for Add
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed
TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::S16), // Broadcast Z dimension is not allowed
TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed
}),
- framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32),
TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8
TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs
TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32),
TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::S16),
TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32),
})),
- framework::dataset::make("Expected", { true, false, true, true, false, true, true, false, false, true})),
+ framework::dataset::make("Expected", { true, false, true, true, false, true, false, false, true, false, false, true})),
input1_info, input2_info, expected)
{
// Create a new workload sketch
@@ -79,7 +90,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx };
GpuWorkloadSketch sketch{ &gpu_ctx };
- // Fuse Elementwise Add
+ // Validate Elementwise Add
auto lhs_info = sketch.create_tensor_info(input1_info);
auto rhs_info = sketch.create_tensor_info(input2_info);
@@ -89,59 +100,73 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
// clang-format on
// *INDENT-ON*
-RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
-RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
-constexpr float tolerance_num = 0.01f; /**< Tolerance number */
+constexpr AbsoluteTolerance<float> tolerance_f16(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr AbsoluteTolerance<float> tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+constexpr float tolerance_num = 0.0001f; /**< Tolerance number */
template <typename T>
-using DynamicFusionAddOpFixture = DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>;
+using DynamicFusionCLAddFixture = DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>;
template <typename T>
-using DynamicFusionAddOpBroadcastFixture = DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>;
+using DynamicFusionCLAddBroadcastFixture = DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>;
template <typename T>
-using DynamicFusionGpuFuseTwoAddOpsFixture = DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuAdd, T>;
+using DynamicFusionCLAddTwoOpsFixture = DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuAdd, T>;
TEST_SUITE(FP32)
-FIXTURE_DATA_TEST_CASE(RunSmallOneOp, DynamicFusionAddOpFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(
- framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::SmallShapesNoBatches()),
- framework::dataset::make("DataType", { DataType::F32 })),
- framework::dataset::make("InPlace", { false, true })))
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLAddFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLargeOneOp, DynamicFusionAddOpFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine(
- framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::LargeShapesNoBatches()),
- framework::dataset::make("DataType", { DataType::F32 })),
- framework::dataset::make("InPlace", { false, true })))
+FIXTURE_DATA_TEST_CASE(RunLargeOneOp,
+ DynamicFusionCLAddFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::LargeShapes()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::TemporaryLimitedSmallShapesBroadcast()),
- framework::dataset::make("DataType", { DataType::F32 })),
- framework::dataset::make("InPlace", { false, true })))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLAddBroadcastFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::TemporaryLimitedSmallShapesBroadcast()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, DynamicFusionAddOpBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::TemporaryLimitedLargeShapesBroadcast()),
- framework::dataset::make("DataType", { DataType::F32 })),
- framework::dataset::make("InPlace", { false, true })))
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp,
+ DynamicFusionCLAddBroadcastFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::TemporaryLimitedLargeShapesBroadcast()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
}
-FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, DynamicFusionGpuFuseTwoAddOpsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()),
- framework::dataset::make("DataType", { DataType::F32 })),
- framework::dataset::make("InPlace", { false })))
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionCLAddTwoOpsFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })),
+ framework::dataset::make("FuseTwoOps", { true })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32);
@@ -149,19 +174,25 @@ FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, DynamicFusionGpuFuseTwoAddOpsFixture<floa
TEST_SUITE_END() // FP32
TEST_SUITE(FP16)
-FIXTURE_DATA_TEST_CASE(RunSmallOneOp, DynamicFusionAddOpFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::SmallShapesNoBatches()),
- framework::dataset::make("DataType", { DataType::F16 })),
- framework::dataset::make("InPlace", { false, true })))
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLAddFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::F16 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
}
-FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::TemporaryLimitedSmallShapesBroadcast()),
- framework::dataset::make("DataType", { DataType::F16 })),
- framework::dataset::make("InPlace", { false })))
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLAddBroadcastFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::TemporaryLimitedSmallShapesBroadcast()),
+ framework::dataset::make("DataType", { DataType::F16 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num);
@@ -170,10 +201,13 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixtur
TEST_SUITE_END() // FP16
TEST_SUITE(S32)
-FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::SmallShapesNoBatches()),
- framework::dataset::make("DataType", { DataType::S32 })),
- framework::dataset::make("InPlace", { false })))
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLAddFixture<int32_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::S32 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -181,18 +215,24 @@ FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<int32_t>, framework::
TEST_SUITE_END() // S32
TEST_SUITE(S16)
-FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::SmallShapesNoBatches()),
- framework::dataset::make("DataType", { DataType::S16 })),
- framework::dataset::make("InPlace", { false })))
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLAddFixture<int16_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::S16 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
}
-FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionAddOpFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::LargeShapesNoBatches()),
- framework::dataset::make("DataType", { DataType::S16 })),
- framework::dataset::make("InPlace", { false })))
+FIXTURE_DATA_TEST_CASE(RunLarge,
+ DynamicFusionCLAddFixture<int16_t>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::LargeShapes()),
+ framework::dataset::make("DataType", { DataType::S16 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
@@ -200,10 +240,13 @@ FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionAddOpFixture<int16_t>, framework::
TEST_SUITE_END() // S16
TEST_SUITE(U8)
-FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
- datasets::SmallShapesNoBatches()),
- framework::dataset::make("DataType", { DataType::U8 })),
- framework::dataset::make("InPlace", { false })))
+FIXTURE_DATA_TEST_CASE(RunSmall,
+ DynamicFusionCLAddFixture<uint8_t>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }),
+ datasets::SmallShapes()),
+ framework::dataset::make("DataType", { DataType::U8 })),
+ framework::dataset::make("InPlace", { false })))
{
// Validate output
validate(CLAccessor(_target), _reference);
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp
new file mode 100644
index 0000000000..a9e8f9c15f
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp
@@ -0,0 +1,223 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+
+#include "tests/datasets/DynamicFusionDataset.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/* Synced with tests/validation/CL/PixelwiseMultiplication.cpp from the standard interface.
+ *
+ * Difference | Why the difference
+ * No integer tests | Not supported yet
+ * No quantized tests | Not supported yet
+ * No convert policy tests | Not needed as convert policy is ignored by floating types
+ * No scale tests | Not supported yet
+ * No rounding modes tests | Not supported yet
+ * No in place tests | Not supported yet
+ * No activation tests | Not needed in dynamic fusion interface
+ *
+ */
+namespace
+{
+constexpr AbsoluteTolerance<float> tolerance_f16(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */
+constexpr AbsoluteTolerance<float> tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */
+} // namespace
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(MUL)
+
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(
+ framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Unsupported data type U8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Unsupported data type S8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // Unsupported data type S16
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // Unsupported data type S32
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8_SIGNED
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed
+ TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::F32), // Broadcast Z dimension is not allowed
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed
+ }),
+ framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED),
+ TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs
+ TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32),
+ TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32),
+ })),
+ framework::dataset::make("Expected", { true, true, false, false, false, false, false, false, false, false, true, true, false, false, true })),
+ input1_info, input2_info, expected)
+{
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &gpu_ctx };
+
+ // Validate Elementwise Mul
+ auto lhs_info = sketch.create_tensor_info(input1_info);
+ auto rhs_info = sketch.create_tensor_info(input2_info);
+
+ bool res = bool(GpuMul::validate_op(sketch, &lhs_info, &rhs_info));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using DynamicFusionCLMulFixture = DynamicFusionMulOneOpValidationFixture<CLTensor, CLAccessor, GpuMul, T>;
+template <typename T>
+using DynamicFusionCLMulBroadcastFixture = DynamicFusionMulBroadcastValidationFixture<CLTensor, CLAccessor, GpuMul, T>;
+template <typename T>
+using DynamicFusionCLMulTwoOpsFixture = DynamicFusionMulTwoOpsValidationFixture<CLTensor, CLAccessor, GpuMul, T>;
+
+TEST_SUITE(F16)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLMulFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", { DataType::F16 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLMulBroadcastFixture<half>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::TemporaryLimitedSmallShapesBroadcast(),
+ framework::dataset::make("DataType", { DataType::F16 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp,
+ DynamicFusionCLMulBroadcastFixture<half>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(datasets::TemporaryLimitedLargeShapesBroadcast(),
+ framework::dataset::make("DataType", { DataType::F16 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f16);
+}
+TEST_SUITE_END() // F16
+
+TEST_SUITE(F32)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionCLMulFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeOneOp,
+ DynamicFusionCLMulFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(datasets::LargeShapes(),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp,
+ DynamicFusionCLMulBroadcastFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(datasets::TemporaryLimitedSmallShapesBroadcast(),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp,
+ DynamicFusionCLMulBroadcastFixture<float>,
+ framework::DatasetMode::NIGHTLY,
+ combine(combine(datasets::TemporaryLimitedLargeShapesBroadcast(),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionCLMulTwoOpsFixture<float>,
+ framework::DatasetMode::PRECOMMIT,
+ combine(combine(combine(datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes(),
+ framework::dataset::make("DataType", { DataType::F32 })),
+ framework::dataset::make("InPlace", { false })),
+ framework::dataset::make("FuseTwoOps", { true })))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance_f32);
+}
+TEST_SUITE_END() // F32
+
+TEST_SUITE_END() // MUL
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
index faed610874..b0680c0e4a 100644
--- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
+++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h
@@ -31,12 +31,9 @@
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
-#include "tests/CL/CLAccessor.h"
#include "tests/framework/Fixture.h"
#include "tests/framework/Macros.h"
-#include "tests/validation/Validation.h"
#include "tests/validation/reference/ElementwiseOperations.h"
-#include "tests/validation/reference/Permute.h"
using namespace arm_compute::experimental::dynamic_fusion;
@@ -51,12 +48,13 @@ class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framewo
{
public:
template <typename...>
- void setup(ArithmeticOperation op, TensorShape shape0, TensorShape shape1, TensorShape shape2, const DataType data_type, const bool is_inplace)
+ void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops = false)
{
- _op = op;
+ _ref_op = ref_op;
_is_inplace = is_inplace;
_data_type = data_type;
- _fuse = shape2.total_size() != 0;
+ _fuse = fuse_two_ops;
+ ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops.");
ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet.");
_target = compute_target(shape0, shape1, shape2);
_reference = compute_reference(shape0, shape1, shape2);
@@ -68,7 +66,7 @@ protected:
{
if(is_data_type_float(tensor.data_type()))
{
- switch(_op)
+ switch(_ref_op)
{
case ArithmeticOperation::DIV:
library->fill_tensor_uniform_ranged(tensor, i, { std::pair<float, float>(-0.001f, 0.001f) });
@@ -82,7 +80,7 @@ protected:
}
else if(tensor.data_type() == DataType::S32)
{
- switch(_op)
+ switch(_ref_op)
{
case ArithmeticOperation::DIV:
library->fill_tensor_uniform_ranged(tensor, i, { std::pair<int32_t, int32_t>(-1U, 1U) });
@@ -97,7 +95,7 @@ protected:
}
}
- TensorType compute_target(TensorShape shape0, TensorShape shape1, TensorShape shape2)
+ TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
{
// Create a new workload sketch
auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
@@ -105,7 +103,7 @@ protected:
GpuWorkloadSketch sketch{ &gpu_ctx };
// Fuse first element wise binary Op
- TensorInfo lhs_info = sketch.create_tensor_info(shape0, 1, _data_type);
+ TensorInfo lhs_info = sketch.create_tensor_info(TensorInfo(shape0, 1, _data_type));
TensorInfo rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type));
TensorInfo dst_info = sketch.create_tensor_info();
@@ -115,7 +113,7 @@ protected:
if(_fuse)
{
- rhs_info_fuse = sketch.create_tensor_info(shape2, 1, _data_type);
+ rhs_info_fuse = sketch.create_tensor_info(TensorInfo(shape2, 1, _data_type));
ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, &rhs_info_fuse);
GpuOutput::create_op(sketch, ans2_info, &dst_info);
}
@@ -183,7 +181,7 @@ protected:
return t_dst;
}
- SimpleTensor<T> compute_reference(TensorShape shape0, TensorShape shape1, TensorShape shape2)
+ SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
{
const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1);
const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1);
@@ -194,21 +192,22 @@ protected:
SimpleTensor<T> ref_rhs_fuse{ shape2, _data_type, 1, QuantizationInfo() };
SimpleTensor<T> ref_dst{ out_shape, _data_type, 1, QuantizationInfo() };
SimpleTensor<T> ref_dst_fuse{ out_shape_fuse, _data_type, 1, QuantizationInfo() };
+
// Fill reference
fill(ref_lhs, 0);
fill(ref_rhs, 1);
- reference::arithmetic_operation<T>(_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP);
+ reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP);
if(_fuse)
{
fill(ref_rhs_fuse, 2);
- reference::arithmetic_operation<T>(_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP);
+ reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP);
}
SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst;
return *ret;
}
- ArithmeticOperation _op{ ArithmeticOperation::ADD };
+ ArithmeticOperation _ref_op{ ArithmeticOperation::ADD };
TensorType _target{};
SimpleTensor<T> _reference{};
DataType _data_type{};
@@ -222,9 +221,9 @@ class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture : public DynamicFu
{
public:
template <typename...>
- void setup(ArithmeticOperation op, TensorShape shape, const DataType data_type, const bool is_inplace)
+ void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace)
{
- DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape, shape, TensorShape(), data_type, is_inplace);
+ DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape0, TensorShape(), data_type, is_inplace);
}
};
@@ -233,9 +232,9 @@ class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture : public
{
public:
template <typename...>
- void setup(ArithmeticOperation op, TensorShape shape0, TensorShape shape1, const DataType data_type, const bool is_inplace)
+ void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace)
{
- DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, TensorShape(), data_type, is_inplace);
+ DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, TensorShape(), data_type, is_inplace);
}
};
@@ -244,9 +243,9 @@ class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture : public DynamicF
{
public:
template <typename...>
- void setup(ArithmeticOperation op, TensorShape shape0, TensorShape shape1, TensorShape shape2, const DataType data_type, const bool is_inplace)
+ void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops)
{
- DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, shape2, data_type, is_inplace);
+ DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops);
}
};
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h
new file mode 100644
index 0000000000..0530707c38
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h
@@ -0,0 +1,235 @@
+/*
+* Copyright (c) 2023 Arm Limited.
+*
+* SPDX-License-Identifier: MIT
+*
+* Permission is hereby granted, free of charge, to any person obtaining a copy
+* of this software and associated documentation files (the "Software"), to
+* deal in the Software without restriction, including without limitation the
+* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+* sell copies of the Software, and to permit persons to whom the Software is
+* furnished to do so, subject to the following conditions:
+*
+* The above copyright notice and this permission notice shall be included in all
+* copies or substantial portions of the Software.
+*
+* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+* SOFTWARE.
+*/
+#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE
+#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h"
+
+#include "tests/Globals.h"
+#include "tests/framework/Fixture.h"
+#include "tests/framework/Macros.h"
+#include "tests/validation/reference/PixelWiseMultiplication.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+/* We use a separate test fixture for Multiplication op instead of reusing ElementwiseBinaryFixture to avoid exposing
+ * the internal enum ElementwiseOp to the public utils/TypePrinters.h as required by the data test case macros
+ * to print the test data.
+ */
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulValidationFixture : public framework::Fixture
+{
+public:
+ template <typename...>
+ void setup(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops = false)
+ {
+ _data_type = data_type;
+ _is_inplace = is_inplace;
+ _fuse = fuse_two_ops;
+ ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops.");
+ ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet.");
+ _target = compute_target(shape0, shape1, shape2);
+ _reference = compute_reference(shape0, shape1, shape2);
+ }
+
+protected:
+ template <typename U>
+ void fill(U &&tensor, int i)
+ {
+ library->fill_tensor_uniform(tensor, i);
+ }
+
+ TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+ {
+ // Create a new workload sketch
+ auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &gpu_ctx };
+
+ // Fuse first multiplication op
+ TensorInfo lhs_info = sketch.create_tensor_info(TensorInfo(shape0, 1, _data_type));
+ TensorInfo rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type));
+ TensorInfo dst_info = sketch.create_tensor_info();
+
+ TensorInfo rhs_info_fuse;
+
+ ITensorInfo *ans_info = FunctionType::create_op(sketch, &lhs_info, &rhs_info);
+
+ if(_fuse)
+ {
+ rhs_info_fuse = sketch.create_tensor_info(TensorInfo(shape2, 1, _data_type));
+ ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, &rhs_info_fuse);
+ GpuOutput::create_op(sketch, ans2_info, &dst_info);
+ }
+ else
+ {
+ GpuOutput::create_op(sketch, ans_info, &dst_info);
+ }
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // (Important) Allocate auxiliary tensor memory if there are any
+ for(auto &data : runtime.get_auxiliary_tensors())
+ {
+ CLTensor *tensor = std::get<0>(data);
+ TensorInfo info = std::get<1>(data);
+ AuxMemoryInfo aux_mem_req = std::get<2>(data);
+ tensor->allocator()->init(info, aux_mem_req.alignment);
+ tensor->allocator()->allocate(); // Use ACL allocated memory
+ }
+
+ // Construct user tensors
+ TensorType t_lhs{};
+ TensorType t_rhs{};
+ TensorType t_rhs_fuse{};
+ TensorType t_dst{};
+
+ // Initialize user tensors
+ t_lhs.allocator()->init(lhs_info);
+ t_rhs.allocator()->init(rhs_info);
+ t_dst.allocator()->init(dst_info);
+ if(_fuse)
+ {
+ t_rhs_fuse.allocator()->init(rhs_info_fuse);
+ }
+
+ // Allocate and fill user tensors
+ // Instead of using ACL allocator, the user can choose to import memory into the tensors
+ t_lhs.allocator()->allocate();
+ t_rhs.allocator()->allocate();
+ t_dst.allocator()->allocate();
+ if(_fuse)
+ {
+ t_rhs_fuse.allocator()->allocate();
+ }
+
+ fill(AccessorType(t_lhs), 0);
+ fill(AccessorType(t_rhs), 1);
+ if(_fuse)
+ {
+ fill(AccessorType(t_rhs_fuse), 2);
+ }
+
+ // Run runtime
+ if(_fuse)
+ {
+ runtime.run({ &t_lhs, &t_rhs, &t_rhs_fuse, &t_dst });
+ }
+ else
+ {
+ runtime.run({ &t_lhs, &t_rhs, &t_dst });
+ }
+
+ return t_dst;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2)
+ {
+ // Create reference
+ SimpleTensor<T> ref_lhs{ shape0, _data_type, 1, QuantizationInfo() };
+ SimpleTensor<T> ref_rhs{ shape1, _data_type, 1, QuantizationInfo() };
+ SimpleTensor<T> ref_rhs_fuse{ shape2, _data_type, 1, QuantizationInfo() };
+
+ // Fill reference
+ fill(ref_lhs, 0);
+ fill(ref_rhs, 1);
+ SimpleTensor<T> ref_dst = reference::pixel_wise_multiplication<T, T, T>(ref_lhs,
+ ref_rhs,
+ 1.f,
+ ConvertPolicy::SATURATE,
+ RoundingPolicy::TO_NEAREST_UP,
+ _data_type,
+ QuantizationInfo());
+ if(_fuse)
+ {
+ fill(ref_rhs_fuse, 2);
+ SimpleTensor<T> ref_dst_fuse = reference::pixel_wise_multiplication<T, T, T>(ref_dst,
+ ref_rhs_fuse,
+ 1.f,
+ ConvertPolicy::SATURATE,
+ RoundingPolicy::TO_NEAREST_UP,
+ _data_type,
+ QuantizationInfo());
+ return ref_dst_fuse;
+ }
+ return ref_dst;
+ }
+
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+ DataType _data_type{};
+ bool _is_inplace{ false };
+ bool _fuse{ false };
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulOneOpValidationFixture : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ template <typename...>
+ void setup(const TensorShape &shape0, DataType data_type, bool is_inplace)
+ {
+ DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape0, TensorShape(), data_type, is_inplace);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulBroadcastValidationFixture : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ template <typename...>
+ void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace)
+ {
+ DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, TensorShape(), data_type, is_inplace);
+ }
+};
+
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionMulTwoOpsValidationFixture : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>
+{
+public:
+ template <typename...>
+ void setup(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops)
+ {
+ DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops);
+ }
+};
+
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE */
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 61f0eb9cec..448f184432 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -421,8 +421,7 @@ inline ::std::ostream &operator<<(::std::ostream &os, const GEMMLHSMatrixInfo &g
*/
inline ::std::ostream &operator<<(::std::ostream &os, const GEMMRHSMatrixInfo &gemm_info)
{
- os << "( n0=" << (unsigned int)gemm_info.n0 << " k0=" << gemm_info.k0 << " h0=" << gemm_info.h0 << " trans=" << gemm_info.transpose << " inter=" << gemm_info.interleave << " exp_img=" <<
- gemm_info.export_to_cl_image << "})";
+ os << "( n0=" << (unsigned int)gemm_info.n0 << " k0=" << gemm_info.k0 << " h0=" << gemm_info.h0 << " trans=" << gemm_info.transpose << " inter=" << gemm_info.interleave << " exp_img=" << gemm_info.export_to_cl_image << "})";
return os;
}
@@ -475,8 +474,7 @@ inline std::string to_string(const GEMMKernelInfo &gemm_info)
inline ::std::ostream &operator<<(::std::ostream &os, const BoundingBoxTransformInfo &bbox_info)
{
auto weights = bbox_info.weights();
- os << "(" << bbox_info.img_width() << "x" << bbox_info.img_height() << ")~" << bbox_info.scale() << "(weights={" << weights[0] << ", " << weights[1] << ", " << weights[2] << ", " << weights[3] <<
- "})";
+ os << "(" << bbox_info.img_width() << "x" << bbox_info.img_height() << ")~" << bbox_info.scale() << "(weights={" << weights[0] << ", " << weights[1] << ", " << weights[2] << ", " << weights[3] << "})";
return os;
}
@@ -3305,46 +3303,46 @@ inline std::string to_string(const Conv3dInfo &conv3d_info)
inline std::string to_string(const WeightFormat wf)
{
#define __CASE_WEIGHT_FORMAT(wf) \
-case WeightFormat::wf: \
- return #wf;
+ case WeightFormat::wf: \
+ return #wf;
switch(wf)
{
- __CASE_WEIGHT_FORMAT(UNSPECIFIED)
- __CASE_WEIGHT_FORMAT(ANY)
- __CASE_WEIGHT_FORMAT(OHWI)
- __CASE_WEIGHT_FORMAT(OHWIo2)
- __CASE_WEIGHT_FORMAT(OHWIo4)
- __CASE_WEIGHT_FORMAT(OHWIo8)
- __CASE_WEIGHT_FORMAT(OHWIo16)
- __CASE_WEIGHT_FORMAT(OHWIo32)
- __CASE_WEIGHT_FORMAT(OHWIo64)
- __CASE_WEIGHT_FORMAT(OHWIo128)
- __CASE_WEIGHT_FORMAT(OHWIo4i2)
- __CASE_WEIGHT_FORMAT(OHWIo4i2_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo8i2)
- __CASE_WEIGHT_FORMAT(OHWIo8i2_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo16i2)
- __CASE_WEIGHT_FORMAT(OHWIo16i2_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo32i2)
- __CASE_WEIGHT_FORMAT(OHWIo32i2_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo64i2)
- __CASE_WEIGHT_FORMAT(OHWIo64i2_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo4i4)
- __CASE_WEIGHT_FORMAT(OHWIo4i4_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo8i4)
- __CASE_WEIGHT_FORMAT(OHWIo8i4_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo16i4)
- __CASE_WEIGHT_FORMAT(OHWIo16i4_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo32i4)
- __CASE_WEIGHT_FORMAT(OHWIo32i4_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo64i4)
- __CASE_WEIGHT_FORMAT(OHWIo64i4_bf16)
- __CASE_WEIGHT_FORMAT(OHWIo2i8)
- __CASE_WEIGHT_FORMAT(OHWIo4i8)
- __CASE_WEIGHT_FORMAT(OHWIo8i8)
- __CASE_WEIGHT_FORMAT(OHWIo16i8)
- __CASE_WEIGHT_FORMAT(OHWIo32i8)
- __CASE_WEIGHT_FORMAT(OHWIo64i8)
+ __CASE_WEIGHT_FORMAT(UNSPECIFIED)
+ __CASE_WEIGHT_FORMAT(ANY)
+ __CASE_WEIGHT_FORMAT(OHWI)
+ __CASE_WEIGHT_FORMAT(OHWIo2)
+ __CASE_WEIGHT_FORMAT(OHWIo4)
+ __CASE_WEIGHT_FORMAT(OHWIo8)
+ __CASE_WEIGHT_FORMAT(OHWIo16)
+ __CASE_WEIGHT_FORMAT(OHWIo32)
+ __CASE_WEIGHT_FORMAT(OHWIo64)
+ __CASE_WEIGHT_FORMAT(OHWIo128)
+ __CASE_WEIGHT_FORMAT(OHWIo4i2)
+ __CASE_WEIGHT_FORMAT(OHWIo4i2_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo8i2)
+ __CASE_WEIGHT_FORMAT(OHWIo8i2_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo16i2)
+ __CASE_WEIGHT_FORMAT(OHWIo16i2_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo32i2)
+ __CASE_WEIGHT_FORMAT(OHWIo32i2_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo64i2)
+ __CASE_WEIGHT_FORMAT(OHWIo64i2_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo4i4)
+ __CASE_WEIGHT_FORMAT(OHWIo4i4_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo8i4)
+ __CASE_WEIGHT_FORMAT(OHWIo8i4_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo16i4)
+ __CASE_WEIGHT_FORMAT(OHWIo16i4_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo32i4)
+ __CASE_WEIGHT_FORMAT(OHWIo32i4_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo64i4)
+ __CASE_WEIGHT_FORMAT(OHWIo64i4_bf16)
+ __CASE_WEIGHT_FORMAT(OHWIo2i8)
+ __CASE_WEIGHT_FORMAT(OHWIo4i8)
+ __CASE_WEIGHT_FORMAT(OHWIo8i8)
+ __CASE_WEIGHT_FORMAT(OHWIo16i8)
+ __CASE_WEIGHT_FORMAT(OHWIo32i8)
+ __CASE_WEIGHT_FORMAT(OHWIo64i8)
default:
return "invalid value";
}
@@ -3629,7 +3627,7 @@ inline std::string to_string(const experimental::dynamic_fusion::ResizeAttribute
*/
inline ::std::ostream &operator<<(::std::ostream &os, const experimental::dynamic_fusion::SoftmaxAttributes &softmax_attr)
{
- os << "SofmtaxAttributes="
+ os << "SoftmaxAttributes="
<< "["
<< "Beta=" << softmax_attr.beta() << ", "
<< "Is Log Softmax=" << softmax_attr.is_log_softmax() << ", "