diff options
-rw-r--r-- | Android.bp | 1 | ||||
-rw-r--r-- | arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h | 4 | ||||
-rw-r--r-- | arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h | 95 | ||||
-rw-r--r-- | filelist.json | 3 | ||||
-rw-r--r-- | src/core/CL/cl_kernels/tile_helpers.h | 5 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp | 3 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp | 22 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp | 75 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h | 20 | ||||
-rw-r--r-- | src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp | 29 | ||||
-rw-r--r-- | tests/validation/dynamic_fusion/gpu/cl/Add.cpp | 157 | ||||
-rw-r--r-- | tests/validation/dynamic_fusion/gpu/cl/Mul.cpp | 223 | ||||
-rw-r--r-- | tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h | 41 | ||||
-rw-r--r-- | tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h | 235 | ||||
-rw-r--r-- | utils/TypePrinter.h | 84 |
15 files changed, 843 insertions, 154 deletions
diff --git a/Android.bp b/Android.bp index 34f722f6fe..78aa64a5f9 100644 --- a/Android.bp +++ b/Android.bp @@ -619,6 +619,7 @@ cc_library_static { "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp", diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h index 6ac5d4e500..796fd6f83a 100644 --- a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h @@ -25,10 +25,12 @@ #define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUADD #include "arm_compute/core/Error.h" -#include "arm_compute/core/ITensorInfo.h" namespace arm_compute { +/** Forward declaration */ +class ITensorInfo; + namespace experimental { namespace dynamic_fusion diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h new file mode 100644 index 0000000000..3e0ebdd96c --- /dev/null +++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h @@ -0,0 +1,95 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMUL +#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMUL + +#include "arm_compute/core/Error.h" + +namespace arm_compute +{ +/** Forward declaration */ +class ITensorInfo; + +namespace experimental +{ +namespace dynamic_fusion +{ +/** Forward declaration */ +class GpuWorkloadContext; +class GpuWorkloadSketch; + +/** Operator interface. */ +class GpuMul final +{ +public: + /** Create an operator and fuse it into the workload sketch. + * @note If @ref validate_op() fails, the creation also fails and may throw an error. + * @note If @ref validate_op() fails, @p sketch remains unchanged and valid. + * + * Valid data type configurations: + * |lhs |rhs |dst | + * |:--------------|:--------------|:-------------| + * |F16 |F16 |F16 | + * |F32 |F32 |F32 | + * + * Valid data layouts: + * - Any + * + * @param[in,out] sketch Workload sketch into which the operator will be fused + * @param[in] lhs Left hand side tensor info. Data types supported: F16/F32. + * @param[in] rhs Right hand side tensor info. Data types supported: Same as @p lhs. + * + * @return Pointer for the destination tensor info + */ + static ITensorInfo *create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs); + + /** Check if the operator configuration is supported, irrespective of fusion + * + * @param[in] context Workload context within which the operator is running + * @param[in] lhs Left hand side tensor info. Data types supported: F16/F32. + * @param[in] rhs Right hand side tensor info. Data types supported: Same as @p lhs. + * + * @return Status + */ + static Status is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *lhs, + const ITensorInfo *rhs); + + /** Validate the operator and check if the configuration is supported and if it can be fused into the workload sketch. + * + * Parameters are similar to @ref GpuMul::create_op() + * + * @return Status + */ + static Status validate_op(const GpuWorkloadSketch &sketch, + const ITensorInfo *rhs, + const ITensorInfo *lhs); +}; + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute +#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUMUL */ diff --git a/filelist.json b/filelist.json index 3cb3a7a76f..a66d2a3384 100644 --- a/filelist.json +++ b/filelist.json @@ -2234,13 +2234,14 @@ "src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp", + "src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp", "src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp", "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp", - "src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp", "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp", diff --git a/src/core/CL/cl_kernels/tile_helpers.h b/src/core/CL/cl_kernels/tile_helpers.h index 507e172dfb..baee0d561b 100644 --- a/src/core/CL/cl_kernels/tile_helpers.h +++ b/src/core/CL/cl_kernels/tile_helpers.h @@ -1063,6 +1063,7 @@ #define V_ADD(A_VAL, B_VAL) ((A_VAL) + (B_VAL)) #define V_DIV(A_VAL, B_VAL) ((A_VAL) / (B_VAL)) +#define V_MUL(A_VAL, B_VAL) ((A_VAL) * (B_VAL)) /** Element-wise activation for quantized types * @@ -1130,6 +1131,9 @@ #define T_ELTWISE_BROADCAST_DIV_X(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) +#define T_ELTWISE_BROADCAST_LHS_X_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_LHS_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) +#define T_ELTWISE_BROADCAST_RHS_X_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE_BROADCAST_X(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) + /** Element-wise scale with a constant value * * @note Performs: LHS * constant = DST @@ -1193,6 +1197,7 @@ #define T_ELTWISE_ADD(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_ADD, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) #define T_ELTWISE_DIV(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_DIV, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) +#define T_ELTWISE_MUL(DST_DATA_TYPE, M0, N0, lhs, rhs, dst) T_ELTWISE(V_MUL, DST_DATA_TYPE, M0, N0, lhs, rhs, dst) /** Element-wise operation between two tiles (LHS and RHS) * diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp index 9a218b3e75..2611d6d575 100644 --- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp @@ -37,7 +37,8 @@ namespace { std::set<ElementwiseBinaryCommonAttributes::ElementwiseOp> supported_ops { - ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD + ElementwiseBinaryCommonAttributes::ElementwiseOp::Add, + ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul }; } diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp index a02160cba8..e7ee1c10df 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp @@ -22,8 +22,8 @@ * SOFTWARE. */ #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" -#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h" #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" namespace arm_compute @@ -36,9 +36,13 @@ Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs) { - // Set the elementwise operation to ADD then call the elementwise common validate_op + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Add then call the elementwise common validate_op ElementwiseBinaryCommonAttributes common_attributes{}; - common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add); return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); } @@ -46,9 +50,13 @@ Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs) { - // Set the elementwise operation to ADD then call the elementwise common is_supported_op + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Add then call the elementwise common is_supported_op ElementwiseBinaryCommonAttributes common_attributes{}; - common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add); return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); } @@ -57,9 +65,9 @@ ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch, ITensorInfo *rhs) { // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op() - // Set the elementwise operation to ADD then call the elementwise common create_op + // Set the elementwise operation to Add then call the elementwise common create_op ElementwiseBinaryCommonAttributes common_attributes{}; - common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD); + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add); return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes); } diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp new file mode 100644 index 0000000000..464a32cbad --- /dev/null +++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp @@ -0,0 +1,75 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" + +#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h" + +namespace arm_compute +{ +namespace experimental +{ +namespace dynamic_fusion +{ +Status GpuMul::validate_op(const GpuWorkloadSketch &sketch, + const ITensorInfo *lhs, + const ITensorInfo *rhs) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Mul then call the elementwise common validate_op + ElementwiseBinaryCommonAttributes common_attributes{}; + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul); + return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes); +} + +Status GpuMul::is_supported_op(const GpuWorkloadContext &context, + const ITensorInfo *lhs, + const ITensorInfo *rhs) +{ + ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs); + ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32); + ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type"); + + // Set the elementwise operation to Mul then call the elementwise common is_supported_op + ElementwiseBinaryCommonAttributes common_attributes{}; + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul); + return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes); +} + +ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch, + ITensorInfo *lhs, + ITensorInfo *rhs) +{ + // Set the elementwise operation to Mul then call the elementwise common create_op + ElementwiseBinaryCommonAttributes common_attributes{}; + common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul); + return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes); +} + +} // namespace dynamic_fusion +} // namespace experimental +} // namespace arm_compute diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h index cbefa379e6..0b58b6eb96 100644 --- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h +++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h @@ -25,11 +25,12 @@ #define SRC_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_INTERNAL_GPUELEMENTWISEBINARYCOMMON #include "arm_compute/core/Error.h" -#include "arm_compute/core/ITensorInfo.h" namespace arm_compute { +/** Forward declaration */ class ITensorInfo; + namespace experimental { namespace dynamic_fusion @@ -39,14 +40,15 @@ class ElementwiseBinaryCommonAttributes public: enum class ElementwiseOp { - ADD, /**< (x + y) */ - SUB, /**< (x - y) */ - DIV, /**< (x / y) */ - MIN, /**< Min(x, y) */ - MAX, /**< Max(x, y) */ - SQUARED_DIFF, /**< (x - y)^2 */ - POWER, /**< x ^ y */ - PRELU, /**< y*x if x < 0, x otherwise */ + Add, /**< (x + y) */ + Sub, /**< (x - y) */ + Div, /**< (x / y) */ + Mul, /**< (x * y) */ + Min, /**< Min(x, y) */ + Max, /**< Max(x, y) */ + SquaredDiff, /**< (x - y)^2 */ + Power, /**< x ^ y */ + Prelu, /**< y*x if x < 0, x otherwise */ }; /** Set operation*/ ElementwiseBinaryCommonAttributes &operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation); diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp index 01017ed909..0dd7ca5e78 100644 --- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp @@ -68,7 +68,7 @@ std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup code = R"_( - //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- + //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- )_"; if(is_root) @@ -139,7 +139,7 @@ R"_( code += R"_( } - //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP --------------------- + //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} --------------------- )_"; return code; @@ -168,33 +168,34 @@ void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtab TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const { - TagLUT lut{}; + TagLUT lut{}; // Local build options lut["meta_kernel_id"] = id(); lut["DATA_TYPE"] = get_cl_type_from_data_type(_lhs->data_type()); // Arguments and global shared variables - lut["lhs"] = vtable.get_variable(_lhs); - lut["rhs"] = vtable.get_variable(_rhs); - lut["dst"] = vtable.get_variable(_dst); + lut["lhs"] = vtable.get_variable(_lhs); + lut["rhs"] = vtable.get_variable(_rhs); + lut["dst"] = vtable.get_variable(_dst); lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor()); switch(_attributes.operation()) { - case Attributes::ElementwiseOp::ADD: + case Attributes::ElementwiseOp::Add: lut["ELTWISE_OP"] = "ADD"; break; + case Attributes::ElementwiseOp::Mul: + lut["ELTWISE_OP"] = "MUL"; + break; default: ARM_COMPUTE_ERROR("Arithmetic Operation not supported"); } ARM_COMPUTE_ERROR_ON( - comp_group.is_intermediate_tensor(_lhs) && - detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0)); + comp_group.is_intermediate_tensor(_lhs) && detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0)); ARM_COMPUTE_ERROR_ON( - comp_group.is_intermediate_tensor(_rhs) && - detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0)); + comp_group.is_intermediate_tensor(_rhs) && detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0)); // Set broadcast parameters // PRE: All tensors are broadcast-compatible @@ -222,9 +223,9 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0"; lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1"; - lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : - (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : - ""; + lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : + (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : + ""; return lut; } diff --git a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp index 0385407ad2..52ba0520ad 100644 --- a/tests/validation/dynamic_fusion/gpu/cl/Add.cpp +++ b/tests/validation/dynamic_fusion/gpu/cl/Add.cpp @@ -34,7 +34,6 @@ #include "tests/datasets/DynamicFusionDataset.h" #include "tests/datasets/ShapeDatasets.h" #include "tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h" -#include "tests/validation/reference/ElementwiseOperations.h" namespace arm_compute { @@ -42,6 +41,14 @@ namespace test { namespace validation { +/* Synced with tests/validation/CL/ArithmeticAddition.cpp from the standard interface. + * + * Difference | Why the difference + * No quantized tests | Not supported yet + * No in place tests | Not supported yet + * No activation tests | Not needed in dynamic fusion interface + * + */ TEST_SUITE(CL) TEST_SUITE(DYNAMIC_FUSION) TEST_SUITE(ADD) @@ -49,29 +56,33 @@ TEST_SUITE(ADD) // *INDENT-OFF* // clang-format off DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( - framework::dataset::make("Input1Info", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // S16 is valid data type for Add TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // S32 is valid data type for Add TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8 TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::S16), // Broadcast Z dimension is not allowed TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed }), - framework::dataset::make("Input2Info",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8 TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32), TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::S16), TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), })), - framework::dataset::make("Expected", { true, false, true, true, false, true, true, false, false, true})), + framework::dataset::make("Expected", { true, false, true, true, false, true, false, false, true, false, false, true})), input1_info, input2_info, expected) { // Create a new workload sketch @@ -79,7 +90,7 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; GpuWorkloadSketch sketch{ &gpu_ctx }; - // Fuse Elementwise Add + // Validate Elementwise Add auto lhs_info = sketch.create_tensor_info(input1_info); auto rhs_info = sketch.create_tensor_info(input2_info); @@ -89,59 +100,73 @@ DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( // clang-format on // *INDENT-ON* -RelativeTolerance<float> tolerance_f32(0.01f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ -RelativeTolerance<half_float::half> tolerance_f16(half_float::half(0.1)); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ -constexpr float tolerance_num = 0.01f; /**< Tolerance number */ +constexpr AbsoluteTolerance<float> tolerance_f16(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr AbsoluteTolerance<float> tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +constexpr float tolerance_num = 0.0001f; /**< Tolerance number */ template <typename T> -using DynamicFusionAddOpFixture = DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; +using DynamicFusionCLAddFixture = DynamicFusionGpuElementwiseBinaryOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; template <typename T> -using DynamicFusionAddOpBroadcastFixture = DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; +using DynamicFusionCLAddBroadcastFixture = DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; template <typename T> -using DynamicFusionGpuFuseTwoAddOpsFixture = DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; +using DynamicFusionCLAddTwoOpsFixture = DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture<CLTensor, CLAccessor, GpuAdd, T>; TEST_SUITE(FP32) -FIXTURE_DATA_TEST_CASE(RunSmallOneOp, DynamicFusionAddOpFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine( - framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::SmallShapesNoBatches()), - framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("InPlace", { false, true }))) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLAddFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLargeOneOp, DynamicFusionAddOpFixture<float>, framework::DatasetMode::NIGHTLY, combine(combine(combine( - framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::LargeShapesNoBatches()), - framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("InPlace", { false, true }))) +FIXTURE_DATA_TEST_CASE(RunLargeOneOp, + DynamicFusionCLAddFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::LargeShapes()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::TemporaryLimitedSmallShapesBroadcast()), - framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("InPlace", { false, true }))) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLAddBroadcastFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, DynamicFusionAddOpBroadcastFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::TemporaryLimitedLargeShapesBroadcast()), - framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("InPlace", { false, true }))) +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, + DynamicFusionCLAddBroadcastFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::TemporaryLimitedLargeShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); } -FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, DynamicFusionGpuFuseTwoAddOpsFixture<float>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()), - framework::dataset::make("DataType", { DataType::F32 })), - framework::dataset::make("InPlace", { false }))) +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionCLAddTwoOpsFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes()), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false })), + framework::dataset::make("FuseTwoOps", { true }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32); @@ -149,19 +174,25 @@ FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, DynamicFusionGpuFuseTwoAddOpsFixture<floa TEST_SUITE_END() // FP32 TEST_SUITE(FP16) -FIXTURE_DATA_TEST_CASE(RunSmallOneOp, DynamicFusionAddOpFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::SmallShapesNoBatches()), - framework::dataset::make("DataType", { DataType::F16 })), - framework::dataset::make("InPlace", { false, true }))) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLAddFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); } -FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixture<half>, framework::DatasetMode::ALL, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::TemporaryLimitedSmallShapesBroadcast()), - framework::dataset::make("DataType", { DataType::F16 })), - framework::dataset::make("InPlace", { false }))) +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLAddBroadcastFixture<half>, + framework::DatasetMode::ALL, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::TemporaryLimitedSmallShapesBroadcast()), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference, tolerance_f32, tolerance_num); @@ -170,10 +201,13 @@ FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, DynamicFusionAddOpBroadcastFixtur TEST_SUITE_END() // FP16 TEST_SUITE(S32) -FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<int32_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::SmallShapesNoBatches()), - framework::dataset::make("DataType", { DataType::S32 })), - framework::dataset::make("InPlace", { false }))) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLAddFixture<int32_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::S32 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -181,18 +215,24 @@ FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<int32_t>, framework:: TEST_SUITE_END() // S32 TEST_SUITE(S16) -FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::SmallShapesNoBatches()), - framework::dataset::make("DataType", { DataType::S16 })), - framework::dataset::make("InPlace", { false }))) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLAddFixture<int16_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::S16 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference); } -FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionAddOpFixture<int16_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::LargeShapesNoBatches()), - framework::dataset::make("DataType", { DataType::S16 })), - framework::dataset::make("InPlace", { false }))) +FIXTURE_DATA_TEST_CASE(RunLarge, + DynamicFusionCLAddFixture<int16_t>, + framework::DatasetMode::NIGHTLY, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::LargeShapes()), + framework::dataset::make("DataType", { DataType::S16 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference); @@ -200,10 +240,13 @@ FIXTURE_DATA_TEST_CASE(RunLarge, DynamicFusionAddOpFixture<int16_t>, framework:: TEST_SUITE_END() // S16 TEST_SUITE(U8) -FIXTURE_DATA_TEST_CASE(RunSmall, DynamicFusionAddOpFixture<uint8_t>, framework::DatasetMode::PRECOMMIT, combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), - datasets::SmallShapesNoBatches()), - framework::dataset::make("DataType", { DataType::U8 })), - framework::dataset::make("InPlace", { false }))) +FIXTURE_DATA_TEST_CASE(RunSmall, + DynamicFusionCLAddFixture<uint8_t>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(framework::dataset::make("ElementwiseOp", { ArithmeticOperation::ADD }), + datasets::SmallShapes()), + framework::dataset::make("DataType", { DataType::U8 })), + framework::dataset::make("InPlace", { false }))) { // Validate output validate(CLAccessor(_target), _reference); diff --git a/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp new file mode 100644 index 0000000000..a9e8f9c15f --- /dev/null +++ b/tests/validation/dynamic_fusion/gpu/cl/Mul.cpp @@ -0,0 +1,223 @@ +/* + * Copyright (c) 2023 Arm Limited. + * + * SPDX-License-Identifier: MIT + * + * Permission is hereby granted, free of charge, to any person obtaining a copy + * of this software and associated documentation files (the "Software"), to + * deal in the Software without restriction, including without limitation the + * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or + * sell copies of the Software, and to permit persons to whom the Software is + * furnished to do so, subject to the following conditions: + * + * The above copyright notice and this permission notice shall be included in all + * copies or substantial portions of the Software. + * + * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR + * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, + * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE + * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER + * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, + * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE + * SOFTWARE. + */ + +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h" + +#include "tests/CL/CLAccessor.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/framework/datasets/Datasets.h" +#include "tests/validation/Validation.h" + +#include "tests/datasets/DynamicFusionDataset.h" +#include "tests/datasets/ShapeDatasets.h" +#include "tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h" + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/* Synced with tests/validation/CL/PixelwiseMultiplication.cpp from the standard interface. + * + * Difference | Why the difference + * No integer tests | Not supported yet + * No quantized tests | Not supported yet + * No convert policy tests | Not needed as convert policy is ignored by floating types + * No scale tests | Not supported yet + * No rounding modes tests | Not supported yet + * No in place tests | Not supported yet + * No activation tests | Not needed in dynamic fusion interface + * + */ +namespace +{ +constexpr AbsoluteTolerance<float> tolerance_f16(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F16 */ +constexpr AbsoluteTolerance<float> tolerance_f32(0.0001f); /**< Tolerance value for comparing reference's output against implementation's output for DataType::F32 */ +} // namespace +TEST_SUITE(CL) +TEST_SUITE(DYNAMIC_FUSION) +TEST_SUITE(MUL) + +// *INDENT-OFF* +// clang-format off +DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip( + framework::dataset::make("LhsInfo", { TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Invalid data type combination + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), // Unsupported data type U8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), // Unsupported data type S8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), // Unsupported data type S16 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), // Unsupported data type S32 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), // Unsupported data type QASYMM8 + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), // Unsupported data type QASYMM8_SIGNED + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), // Mismatching shapes + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for lhs + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(15U, 23U, 3U), 1, DataType::F32), // Broadcast Y dimension is not allowed + TensorInfo(TensorShape( 3U, 8U, 9U), 1, DataType::F32), // Broadcast Z dimension is not allowed + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), // Batching is allowed + }), + framework::dataset::make("RhsInfo",{ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::U8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S16), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::S32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::QASYMM8_SIGNED), + TensorInfo(TensorShape(48U, 11U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 1U, 1U), 1, DataType::F32), // Broadcasting allowed for rhs + TensorInfo(TensorShape(15U, 1U, 3U), 1, DataType::F32), + TensorInfo(TensorShape( 3U, 8U, 1U), 1, DataType::F32), + TensorInfo(TensorShape(32U, 13U, 2U, 2), 1, DataType::F32), + })), + framework::dataset::make("Expected", { true, true, false, false, false, false, false, false, false, false, true, true, false, false, true })), + input1_info, input2_info, expected) +{ + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Validate Elementwise Mul + auto lhs_info = sketch.create_tensor_info(input1_info); + auto rhs_info = sketch.create_tensor_info(input2_info); + + bool res = bool(GpuMul::validate_op(sketch, &lhs_info, &rhs_info)); + ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS); +} +// clang-format on +// *INDENT-ON* + +template <typename T> +using DynamicFusionCLMulFixture = DynamicFusionMulOneOpValidationFixture<CLTensor, CLAccessor, GpuMul, T>; +template <typename T> +using DynamicFusionCLMulBroadcastFixture = DynamicFusionMulBroadcastValidationFixture<CLTensor, CLAccessor, GpuMul, T>; +template <typename T> +using DynamicFusionCLMulTwoOpsFixture = DynamicFusionMulTwoOpsValidationFixture<CLTensor, CLAccessor, GpuMul, T>; + +TEST_SUITE(F16) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLMulFixture<half>, + framework::DatasetMode::ALL, + combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLMulBroadcastFixture<half>, + framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::TemporaryLimitedSmallShapesBroadcast(), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} + +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, + DynamicFusionCLMulBroadcastFixture<half>, + framework::DatasetMode::NIGHTLY, + combine(combine(datasets::TemporaryLimitedLargeShapesBroadcast(), + framework::dataset::make("DataType", { DataType::F16 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f16); +} +TEST_SUITE_END() // F16 + +TEST_SUITE(F32) +FIXTURE_DATA_TEST_CASE(RunSmallOneOp, + DynamicFusionCLMulFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::SmallShapes(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLargeOneOp, + DynamicFusionCLMulFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(datasets::LargeShapes(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmallBroadcastOneOp, + DynamicFusionCLMulBroadcastFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(datasets::TemporaryLimitedSmallShapesBroadcast(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunLargeBroadcastOneOp, + DynamicFusionCLMulBroadcastFixture<float>, + framework::DatasetMode::NIGHTLY, + combine(combine(datasets::TemporaryLimitedLargeShapesBroadcast(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} + +FIXTURE_DATA_TEST_CASE(RunSmallTwoOps, + DynamicFusionCLMulTwoOpsFixture<float>, + framework::DatasetMode::PRECOMMIT, + combine(combine(combine(datasets::DynamicFusionElementwiseBinaryTwoOpsSmallShapes(), + framework::dataset::make("DataType", { DataType::F32 })), + framework::dataset::make("InPlace", { false })), + framework::dataset::make("FuseTwoOps", { true }))) +{ + // Validate output + validate(CLAccessor(_target), _reference, tolerance_f32); +} +TEST_SUITE_END() // F32 + +TEST_SUITE_END() // MUL +TEST_SUITE_END() // DYNAMIC_FUSION +TEST_SUITE_END() // CL +} // namespace validation +} // namespace test +} // namespace arm_compute diff --git a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h index faed610874..b0680c0e4a 100644 --- a/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h +++ b/tests/validation/fixtures/dynamic_fusion/gpu/cl/ElementwiseBinaryFixture.h @@ -31,12 +31,9 @@ #include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" -#include "tests/CL/CLAccessor.h" #include "tests/framework/Fixture.h" #include "tests/framework/Macros.h" -#include "tests/validation/Validation.h" #include "tests/validation/reference/ElementwiseOperations.h" -#include "tests/validation/reference/Permute.h" using namespace arm_compute::experimental::dynamic_fusion; @@ -51,12 +48,13 @@ class DynamicFusionGpuElementwiseBinaryValidationGenericFixture : public framewo { public: template <typename...> - void setup(ArithmeticOperation op, TensorShape shape0, TensorShape shape1, TensorShape shape2, const DataType data_type, const bool is_inplace) + void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops = false) { - _op = op; + _ref_op = ref_op; _is_inplace = is_inplace; _data_type = data_type; - _fuse = shape2.total_size() != 0; + _fuse = fuse_two_ops; + ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops."); ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet."); _target = compute_target(shape0, shape1, shape2); _reference = compute_reference(shape0, shape1, shape2); @@ -68,7 +66,7 @@ protected: { if(is_data_type_float(tensor.data_type())) { - switch(_op) + switch(_ref_op) { case ArithmeticOperation::DIV: library->fill_tensor_uniform_ranged(tensor, i, { std::pair<float, float>(-0.001f, 0.001f) }); @@ -82,7 +80,7 @@ protected: } else if(tensor.data_type() == DataType::S32) { - switch(_op) + switch(_ref_op) { case ArithmeticOperation::DIV: library->fill_tensor_uniform_ranged(tensor, i, { std::pair<int32_t, int32_t>(-1U, 1U) }); @@ -97,7 +95,7 @@ protected: } } - TensorType compute_target(TensorShape shape0, TensorShape shape1, TensorShape shape2) + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) { // Create a new workload sketch auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); @@ -105,7 +103,7 @@ protected: GpuWorkloadSketch sketch{ &gpu_ctx }; // Fuse first element wise binary Op - TensorInfo lhs_info = sketch.create_tensor_info(shape0, 1, _data_type); + TensorInfo lhs_info = sketch.create_tensor_info(TensorInfo(shape0, 1, _data_type)); TensorInfo rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type)); TensorInfo dst_info = sketch.create_tensor_info(); @@ -115,7 +113,7 @@ protected: if(_fuse) { - rhs_info_fuse = sketch.create_tensor_info(shape2, 1, _data_type); + rhs_info_fuse = sketch.create_tensor_info(TensorInfo(shape2, 1, _data_type)); ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, &rhs_info_fuse); GpuOutput::create_op(sketch, ans2_info, &dst_info); } @@ -183,7 +181,7 @@ protected: return t_dst; } - SimpleTensor<T> compute_reference(TensorShape shape0, TensorShape shape1, TensorShape shape2) + SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) { const TensorShape out_shape = TensorShape::broadcast_shape(shape0, shape1); const TensorShape out_shape_fuse = TensorShape::broadcast_shape(out_shape, shape1); @@ -194,21 +192,22 @@ protected: SimpleTensor<T> ref_rhs_fuse{ shape2, _data_type, 1, QuantizationInfo() }; SimpleTensor<T> ref_dst{ out_shape, _data_type, 1, QuantizationInfo() }; SimpleTensor<T> ref_dst_fuse{ out_shape_fuse, _data_type, 1, QuantizationInfo() }; + // Fill reference fill(ref_lhs, 0); fill(ref_rhs, 1); - reference::arithmetic_operation<T>(_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP); + reference::arithmetic_operation<T>(_ref_op, ref_lhs, ref_rhs, ref_dst, ConvertPolicy::WRAP); if(_fuse) { fill(ref_rhs_fuse, 2); - reference::arithmetic_operation<T>(_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP); + reference::arithmetic_operation<T>(_ref_op, ref_dst, ref_rhs_fuse, ref_dst_fuse, ConvertPolicy::WRAP); } SimpleTensor<T> *ret = _fuse ? &ref_dst_fuse : &ref_dst; return *ret; } - ArithmeticOperation _op{ ArithmeticOperation::ADD }; + ArithmeticOperation _ref_op{ ArithmeticOperation::ADD }; TensorType _target{}; SimpleTensor<T> _reference{}; DataType _data_type{}; @@ -222,9 +221,9 @@ class DynamicFusionGpuElementwiseBinaryOneOpValidationFixture : public DynamicFu { public: template <typename...> - void setup(ArithmeticOperation op, TensorShape shape, const DataType data_type, const bool is_inplace) + void setup(ArithmeticOperation ref_op, const TensorShape &shape0, DataType data_type, bool is_inplace) { - DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape, shape, TensorShape(), data_type, is_inplace); + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape0, TensorShape(), data_type, is_inplace); } }; @@ -233,9 +232,9 @@ class DynamicFusionGpuElementwiseBinaryBroadcastOneOpValidationFixture : public { public: template <typename...> - void setup(ArithmeticOperation op, TensorShape shape0, TensorShape shape1, const DataType data_type, const bool is_inplace) + void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace) { - DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, TensorShape(), data_type, is_inplace); + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, TensorShape(), data_type, is_inplace); } }; @@ -244,9 +243,9 @@ class DynamicFusionGpuElementwiseBinaryTwoOpsValidationFixture : public DynamicF { public: template <typename...> - void setup(ArithmeticOperation op, TensorShape shape0, TensorShape shape1, TensorShape shape2, const DataType data_type, const bool is_inplace) + void setup(ArithmeticOperation ref_op, const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops) { - DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(op, shape0, shape1, shape2, data_type, is_inplace); + DynamicFusionGpuElementwiseBinaryValidationGenericFixture<TensorType, AccessorType, FunctionType, T>::setup(ref_op, shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); } }; diff --git a/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h new file mode 100644 index 0000000000..0530707c38 --- /dev/null +++ b/tests/validation/fixtures/dynamic_fusion/operators/MulFixture.h @@ -0,0 +1,235 @@ +/* +* Copyright (c) 2023 Arm Limited. +* +* SPDX-License-Identifier: MIT +* +* Permission is hereby granted, free of charge, to any person obtaining a copy +* of this software and associated documentation files (the "Software"), to +* deal in the Software without restriction, including without limitation the +* rights to use, copy, modify, merge, publish, distribute, sublicense, and/or +* sell copies of the Software, and to permit persons to whom the Software is +* furnished to do so, subject to the following conditions: +* +* The above copyright notice and this permission notice shall be included in all +* copies or substantial portions of the Software. +* +* THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR +* IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, +* FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE +* AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER +* LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, +* OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE +* SOFTWARE. +*/ +#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE +#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE + +#include "arm_compute/core/CL/CLKernelLibrary.h" +#include "arm_compute/core/TensorInfo.h" +#include "arm_compute/core/Types.h" +#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h" +#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuOutput.h" + +#include "tests/Globals.h" +#include "tests/framework/Fixture.h" +#include "tests/framework/Macros.h" +#include "tests/validation/reference/PixelWiseMultiplication.h" + +using namespace arm_compute::experimental::dynamic_fusion; + +namespace arm_compute +{ +namespace test +{ +namespace validation +{ +/* We use a separate test fixture for Multiplication op instead of reusing ElementwiseBinaryFixture to avoid exposing + * the internal enum ElementwiseOp to the public utils/TypePrinters.h as required by the data test case macros + * to print the test data. + */ +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulValidationFixture : public framework::Fixture +{ +public: + template <typename...> + void setup(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops = false) + { + _data_type = data_type; + _is_inplace = is_inplace; + _fuse = fuse_two_ops; + ARM_COMPUTE_ERROR_ON_MSG(_fuse && shape2.total_size() == 0, "No shape2 provided for fusion of two ops."); + ARM_COMPUTE_ERROR_ON_MSG(_fuse && _is_inplace, "In place for fusing case not supported yet."); + _target = compute_target(shape0, shape1, shape2); + _reference = compute_reference(shape0, shape1, shape2); + } + +protected: + template <typename U> + void fill(U &&tensor, int i) + { + library->fill_tensor_uniform(tensor, i); + } + + TensorType compute_target(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + // Create a new workload sketch + auto cl_compile_ctx = CLKernelLibrary::get().get_compile_context(); + auto gpu_ctx = GpuWorkloadContext{ &cl_compile_ctx }; + GpuWorkloadSketch sketch{ &gpu_ctx }; + + // Fuse first multiplication op + TensorInfo lhs_info = sketch.create_tensor_info(TensorInfo(shape0, 1, _data_type)); + TensorInfo rhs_info = sketch.create_tensor_info(TensorInfo(shape1, 1, _data_type)); + TensorInfo dst_info = sketch.create_tensor_info(); + + TensorInfo rhs_info_fuse; + + ITensorInfo *ans_info = FunctionType::create_op(sketch, &lhs_info, &rhs_info); + + if(_fuse) + { + rhs_info_fuse = sketch.create_tensor_info(TensorInfo(shape2, 1, _data_type)); + ITensorInfo *ans2_info = FunctionType::create_op(sketch, ans_info, &rhs_info_fuse); + GpuOutput::create_op(sketch, ans2_info, &dst_info); + } + else + { + GpuOutput::create_op(sketch, ans_info, &dst_info); + } + + // Configure runtime + ClWorkloadRuntime runtime; + runtime.configure(sketch); + + // (Important) Allocate auxiliary tensor memory if there are any + for(auto &data : runtime.get_auxiliary_tensors()) + { + CLTensor *tensor = std::get<0>(data); + TensorInfo info = std::get<1>(data); + AuxMemoryInfo aux_mem_req = std::get<2>(data); + tensor->allocator()->init(info, aux_mem_req.alignment); + tensor->allocator()->allocate(); // Use ACL allocated memory + } + + // Construct user tensors + TensorType t_lhs{}; + TensorType t_rhs{}; + TensorType t_rhs_fuse{}; + TensorType t_dst{}; + + // Initialize user tensors + t_lhs.allocator()->init(lhs_info); + t_rhs.allocator()->init(rhs_info); + t_dst.allocator()->init(dst_info); + if(_fuse) + { + t_rhs_fuse.allocator()->init(rhs_info_fuse); + } + + // Allocate and fill user tensors + // Instead of using ACL allocator, the user can choose to import memory into the tensors + t_lhs.allocator()->allocate(); + t_rhs.allocator()->allocate(); + t_dst.allocator()->allocate(); + if(_fuse) + { + t_rhs_fuse.allocator()->allocate(); + } + + fill(AccessorType(t_lhs), 0); + fill(AccessorType(t_rhs), 1); + if(_fuse) + { + fill(AccessorType(t_rhs_fuse), 2); + } + + // Run runtime + if(_fuse) + { + runtime.run({ &t_lhs, &t_rhs, &t_rhs_fuse, &t_dst }); + } + else + { + runtime.run({ &t_lhs, &t_rhs, &t_dst }); + } + + return t_dst; + } + + SimpleTensor<T> compute_reference(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2) + { + // Create reference + SimpleTensor<T> ref_lhs{ shape0, _data_type, 1, QuantizationInfo() }; + SimpleTensor<T> ref_rhs{ shape1, _data_type, 1, QuantizationInfo() }; + SimpleTensor<T> ref_rhs_fuse{ shape2, _data_type, 1, QuantizationInfo() }; + + // Fill reference + fill(ref_lhs, 0); + fill(ref_rhs, 1); + SimpleTensor<T> ref_dst = reference::pixel_wise_multiplication<T, T, T>(ref_lhs, + ref_rhs, + 1.f, + ConvertPolicy::SATURATE, + RoundingPolicy::TO_NEAREST_UP, + _data_type, + QuantizationInfo()); + if(_fuse) + { + fill(ref_rhs_fuse, 2); + SimpleTensor<T> ref_dst_fuse = reference::pixel_wise_multiplication<T, T, T>(ref_dst, + ref_rhs_fuse, + 1.f, + ConvertPolicy::SATURATE, + RoundingPolicy::TO_NEAREST_UP, + _data_type, + QuantizationInfo()); + return ref_dst_fuse; + } + return ref_dst; + } + + TensorType _target{}; + SimpleTensor<T> _reference{}; + DataType _data_type{}; + bool _is_inplace{ false }; + bool _fuse{ false }; +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulOneOpValidationFixture : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + template <typename...> + void setup(const TensorShape &shape0, DataType data_type, bool is_inplace) + { + DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape0, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulBroadcastValidationFixture : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + template <typename...> + void setup(const TensorShape &shape0, const TensorShape &shape1, DataType data_type, bool is_inplace) + { + DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, TensorShape(), data_type, is_inplace); + } +}; + +template <typename TensorType, typename AccessorType, typename FunctionType, typename T> +class DynamicFusionMulTwoOpsValidationFixture : public DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T> +{ +public: + template <typename...> + void setup(const TensorShape &shape0, const TensorShape &shape1, const TensorShape &shape2, DataType data_type, bool is_inplace, bool fuse_two_ops) + { + DynamicFusionMulValidationFixture<TensorType, AccessorType, FunctionType, T>::setup(shape0, shape1, shape2, data_type, is_inplace, fuse_two_ops); + } +}; + +} // namespace validation +} // namespace test +} // namespace arm_compute +#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_MULFIXTURE */ diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h index 61f0eb9cec..448f184432 100644 --- a/utils/TypePrinter.h +++ b/utils/TypePrinter.h @@ -421,8 +421,7 @@ inline ::std::ostream &operator<<(::std::ostream &os, const GEMMLHSMatrixInfo &g */ inline ::std::ostream &operator<<(::std::ostream &os, const GEMMRHSMatrixInfo &gemm_info) { - os << "( n0=" << (unsigned int)gemm_info.n0 << " k0=" << gemm_info.k0 << " h0=" << gemm_info.h0 << " trans=" << gemm_info.transpose << " inter=" << gemm_info.interleave << " exp_img=" << - gemm_info.export_to_cl_image << "})"; + os << "( n0=" << (unsigned int)gemm_info.n0 << " k0=" << gemm_info.k0 << " h0=" << gemm_info.h0 << " trans=" << gemm_info.transpose << " inter=" << gemm_info.interleave << " exp_img=" << gemm_info.export_to_cl_image << "})"; return os; } @@ -475,8 +474,7 @@ inline std::string to_string(const GEMMKernelInfo &gemm_info) inline ::std::ostream &operator<<(::std::ostream &os, const BoundingBoxTransformInfo &bbox_info) { auto weights = bbox_info.weights(); - os << "(" << bbox_info.img_width() << "x" << bbox_info.img_height() << ")~" << bbox_info.scale() << "(weights={" << weights[0] << ", " << weights[1] << ", " << weights[2] << ", " << weights[3] << - "})"; + os << "(" << bbox_info.img_width() << "x" << bbox_info.img_height() << ")~" << bbox_info.scale() << "(weights={" << weights[0] << ", " << weights[1] << ", " << weights[2] << ", " << weights[3] << "})"; return os; } @@ -3305,46 +3303,46 @@ inline std::string to_string(const Conv3dInfo &conv3d_info) inline std::string to_string(const WeightFormat wf) { #define __CASE_WEIGHT_FORMAT(wf) \ -case WeightFormat::wf: \ - return #wf; + case WeightFormat::wf: \ + return #wf; switch(wf) { - __CASE_WEIGHT_FORMAT(UNSPECIFIED) - __CASE_WEIGHT_FORMAT(ANY) - __CASE_WEIGHT_FORMAT(OHWI) - __CASE_WEIGHT_FORMAT(OHWIo2) - __CASE_WEIGHT_FORMAT(OHWIo4) - __CASE_WEIGHT_FORMAT(OHWIo8) - __CASE_WEIGHT_FORMAT(OHWIo16) - __CASE_WEIGHT_FORMAT(OHWIo32) - __CASE_WEIGHT_FORMAT(OHWIo64) - __CASE_WEIGHT_FORMAT(OHWIo128) - __CASE_WEIGHT_FORMAT(OHWIo4i2) - __CASE_WEIGHT_FORMAT(OHWIo4i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo8i2) - __CASE_WEIGHT_FORMAT(OHWIo8i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo16i2) - __CASE_WEIGHT_FORMAT(OHWIo16i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo32i2) - __CASE_WEIGHT_FORMAT(OHWIo32i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo64i2) - __CASE_WEIGHT_FORMAT(OHWIo64i2_bf16) - __CASE_WEIGHT_FORMAT(OHWIo4i4) - __CASE_WEIGHT_FORMAT(OHWIo4i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo8i4) - __CASE_WEIGHT_FORMAT(OHWIo8i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo16i4) - __CASE_WEIGHT_FORMAT(OHWIo16i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo32i4) - __CASE_WEIGHT_FORMAT(OHWIo32i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo64i4) - __CASE_WEIGHT_FORMAT(OHWIo64i4_bf16) - __CASE_WEIGHT_FORMAT(OHWIo2i8) - __CASE_WEIGHT_FORMAT(OHWIo4i8) - __CASE_WEIGHT_FORMAT(OHWIo8i8) - __CASE_WEIGHT_FORMAT(OHWIo16i8) - __CASE_WEIGHT_FORMAT(OHWIo32i8) - __CASE_WEIGHT_FORMAT(OHWIo64i8) + __CASE_WEIGHT_FORMAT(UNSPECIFIED) + __CASE_WEIGHT_FORMAT(ANY) + __CASE_WEIGHT_FORMAT(OHWI) + __CASE_WEIGHT_FORMAT(OHWIo2) + __CASE_WEIGHT_FORMAT(OHWIo4) + __CASE_WEIGHT_FORMAT(OHWIo8) + __CASE_WEIGHT_FORMAT(OHWIo16) + __CASE_WEIGHT_FORMAT(OHWIo32) + __CASE_WEIGHT_FORMAT(OHWIo64) + __CASE_WEIGHT_FORMAT(OHWIo128) + __CASE_WEIGHT_FORMAT(OHWIo4i2) + __CASE_WEIGHT_FORMAT(OHWIo4i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo8i2) + __CASE_WEIGHT_FORMAT(OHWIo8i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo16i2) + __CASE_WEIGHT_FORMAT(OHWIo16i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo32i2) + __CASE_WEIGHT_FORMAT(OHWIo32i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo64i2) + __CASE_WEIGHT_FORMAT(OHWIo64i2_bf16) + __CASE_WEIGHT_FORMAT(OHWIo4i4) + __CASE_WEIGHT_FORMAT(OHWIo4i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo8i4) + __CASE_WEIGHT_FORMAT(OHWIo8i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo16i4) + __CASE_WEIGHT_FORMAT(OHWIo16i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo32i4) + __CASE_WEIGHT_FORMAT(OHWIo32i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo64i4) + __CASE_WEIGHT_FORMAT(OHWIo64i4_bf16) + __CASE_WEIGHT_FORMAT(OHWIo2i8) + __CASE_WEIGHT_FORMAT(OHWIo4i8) + __CASE_WEIGHT_FORMAT(OHWIo8i8) + __CASE_WEIGHT_FORMAT(OHWIo16i8) + __CASE_WEIGHT_FORMAT(OHWIo32i8) + __CASE_WEIGHT_FORMAT(OHWIo64i8) default: return "invalid value"; } @@ -3629,7 +3627,7 @@ inline std::string to_string(const experimental::dynamic_fusion::ResizeAttribute */ inline ::std::ostream &operator<<(::std::ostream &os, const experimental::dynamic_fusion::SoftmaxAttributes &softmax_attr) { - os << "SofmtaxAttributes=" + os << "SoftmaxAttributes=" << "[" << "Beta=" << softmax_attr.beta() << ", " << "Is Log Softmax=" << softmax_attr.is_log_softmax() << ", " |