5 files changed, 118 insertions, 31 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
index 9a218b3e75..2611d6d575 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
@@ -37,7 +37,8 @@ namespace
 {
 std::set<ElementwiseBinaryCommonAttributes::ElementwiseOp> supported_ops
 {
-    ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD
+    ElementwiseBinaryCommonAttributes::ElementwiseOp::Add,
+    ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul
 };
 }
 
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
index a02160cba8..e7ee1c10df 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
@@ -22,8 +22,8 @@
  * SOFTWARE.
  */
 #include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
 
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
 #include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
 
 namespace arm_compute
@@ -36,9 +36,13 @@ Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch,
                            const ITensorInfo       *lhs,
                            const ITensorInfo       *rhs)
 {
-    // Set the elementwise operation to ADD then call the elementwise common validate_op
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+    // Set the elementwise operation to Add then call the elementwise common validate_op
     ElementwiseBinaryCommonAttributes common_attributes{};
-    common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD);
+    common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add);
     return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
 }
 
@@ -46,9 +50,13 @@ Status GpuAdd::is_supported_op(const GpuWorkloadContext &context,
                                const ITensorInfo        *lhs,
                                const ITensorInfo        *rhs)
 {
-    // Set the elementwise operation to ADD then call the elementwise common is_supported_op
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+    // Set the elementwise operation to Add then call the elementwise common is_supported_op
     ElementwiseBinaryCommonAttributes common_attributes{};
-    common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD);
+    common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add);
     return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
 }
 
@@ -57,9 +65,9 @@ ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch,
                                ITensorInfo       *rhs)
 {
     // No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op()
-    // Set the elementwise operation to ADD then call the elementwise common create_op
+    // Set the elementwise operation to Add then call the elementwise common create_op
     ElementwiseBinaryCommonAttributes common_attributes{};
-    common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::ADD);
+    common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Add);
     return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes);
 }
 
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
new file mode 100644
index 0000000000..464a32cbad
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
@@ -0,0 +1,75 @@
+/*
+ * Copyright (c) 2023 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+
+#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+Status GpuMul::validate_op(const GpuWorkloadSketch &sketch,
+                           const ITensorInfo       *lhs,
+                           const ITensorInfo       *rhs)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+    // Set the elementwise operation to Mul then call the elementwise common validate_op
+    ElementwiseBinaryCommonAttributes common_attributes{};
+    common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul);
+    return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
+}
+
+Status GpuMul::is_supported_op(const GpuWorkloadContext &context,
+                               const ITensorInfo        *lhs,
+                               const ITensorInfo        *rhs)
+{
+    ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
+    ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
+    ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
+
+    // Set the elementwise operation to Mul then call the elementwise common is_supported_op
+    ElementwiseBinaryCommonAttributes common_attributes{};
+    common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul);
+    return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
+}
+
+ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch,
+                               ITensorInfo       *lhs,
+                               ITensorInfo       *rhs)
+{
+    // Set the elementwise operation to Mul then call the elementwise common create_op
+    ElementwiseBinaryCommonAttributes common_attributes{};
+    common_attributes.operation(ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul);
+    return GpuElementwiseBinaryCommon::create_op(sketch, lhs, rhs, common_attributes);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h
index cbefa379e6..0b58b6eb96 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h
+++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h
@@ -25,11 +25,12 @@
 #define SRC_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_INTERNAL_GPUELEMENTWISEBINARYCOMMON
 
 #include "arm_compute/core/Error.h"
-#include "arm_compute/core/ITensorInfo.h"
 
 namespace arm_compute
 {
+/** Forward declaration */
 class ITensorInfo;
+
 namespace experimental
 {
 namespace dynamic_fusion
@@ -39,14 +40,15 @@ class ElementwiseBinaryCommonAttributes
 public:
     enum class ElementwiseOp
     {
-        ADD,          /**< (x + y) */
-        SUB,          /**< (x - y) */
-        DIV,          /**< (x / y) */
-        MIN,          /**< Min(x, y) */
-        MAX,          /**< Max(x, y) */
-        SQUARED_DIFF, /**< (x - y)^2 */
-        POWER,        /**< x ^ y */
-        PRELU,        /**< y*x if x < 0, x otherwise */
+        Add,         /**< (x + y) */
+        Sub,         /**< (x - y) */
+        Div,         /**< (x / y) */
+        Mul,         /**< (x * y) */
+        Min,         /**< Min(x, y) */
+        Max,         /**< Max(x, y) */
+        SquaredDiff, /**< (x - y)^2 */
+        Power,       /**< x ^ y */
+        Prelu,       /**< y*x if x < 0, x otherwise */
     };
     /** Set operation*/
     ElementwiseBinaryCommonAttributes &operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
index 01017ed909..0dd7ca5e78 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
@@ -68,7 +68,7 @@ std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup
 
     code =
 R"_(
-    //------------------ START KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
+    //------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
 )_";
 
     if(is_root)
@@ -139,7 +139,7 @@ R"_(
     code +=
 R"_(
     }
-    //------------------ END KERNEL {{meta_kernel_id}} ELTWISE_OP ---------------------
+    //------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
 )_";
 
     return code;
@@ -168,33 +168,34 @@ void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtab
 
 TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
 {
-    TagLUT             lut{};
+    TagLUT lut{};
 
     // Local build options
     lut["meta_kernel_id"] = id();
     lut["DATA_TYPE"]      = get_cl_type_from_data_type(_lhs->data_type());
     // Arguments and global shared variables
 
-    lut["lhs"] = vtable.get_variable(_lhs);
-    lut["rhs"] = vtable.get_variable(_rhs);
-    lut["dst"] = vtable.get_variable(_dst);
+    lut["lhs"]     = vtable.get_variable(_lhs);
+    lut["rhs"]     = vtable.get_variable(_rhs);
+    lut["dst"]     = vtable.get_variable(_dst);
     lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
 
     switch(_attributes.operation())
     {
-        case Attributes::ElementwiseOp::ADD:
+        case Attributes::ElementwiseOp::Add:
             lut["ELTWISE_OP"] = "ADD";
             break;
+        case Attributes::ElementwiseOp::Mul:
+            lut["ELTWISE_OP"] = "MUL";
+            break;
         default:
             ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
     }
 
     ARM_COMPUTE_ERROR_ON(
-        comp_group.is_intermediate_tensor(_lhs) &&
-        detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
+        comp_group.is_intermediate_tensor(_lhs) && detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
     ARM_COMPUTE_ERROR_ON(
-        comp_group.is_intermediate_tensor(_rhs) &&
-        detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
+        comp_group.is_intermediate_tensor(_rhs) && detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
 
     // Set broadcast parameters
     // PRE: All tensors are broadcast-compatible
@@ -222,9 +223,9 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
     lut["rhs_m0"]          = (rhs_broadcast_yz) ? "1" : "M0";
     lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1";
 
-    lut["BROADCAST_OP"]    = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" :
-                             (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" :
-                                                  "";
+    lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" :
+                          (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" :
+                                               "";
 
     return lut;
 }