aboutsummaryrefslogtreecommitdiff
path: root/src
diff options
context:
space:
mode:
authorJakub Sujak <jakub.sujak@arm.com>2022-11-25 16:43:18 +0000
committerJakub Sujak <jakub.sujak@arm.com>2022-12-13 09:58:36 +0000
commit32741725ac6e6c9658c51ed8585d314a1707ee8c (patch)
treea32c508ade253975a892387f7d5eba5e3f02e650 /src
parent5945070e4d73962f6d1e0b9eff2ab7488ee5c787 (diff)
downloadComputeLibrary-32741725ac6e6c9658c51ed8585d314a1707ee8c.tar.gz
Add CLAMP operator to Dynamic Fusion interface
Add the CLAMP activation function for GPU backend with generic activation Component and TemplateWriter modules. CLAMP is internally implemented as LU_BOUNDED_RELU activation function with the alpha and beta variables swapped. We do NOT consider in-place computation cases in this patch. * CLAMP operator for GPU backend. * Activation Component and TemplateWriter for CL backend. * TemplateWriter generates tiled kernel code. * Supported data types: F16, F32. * Validation tests for CLAMP operation. Resolves: COMPMID-5519 Change-Id: Ieb097d6b1e6a7ed2b882518e88314454efb402f6 Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/8762 Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-by: SiCong Li <sicong.li@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'src')
-rw-r--r--src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp58
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp79
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h120
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp158
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h2
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp188
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h118
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h2
-rw-r--r--src/gpu/cl/ClKernelLibrary.cpp8
9 files changed, 732 insertions, 1 deletions
diff --git a/src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp b/src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp
new file mode 100644
index 0000000000..b177f760df
--- /dev/null
+++ b/src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+ClampAttributes &ClampAttributes::min_val(const float &min_val)
+{
+ _min_val = min_val;
+ return *this;
+}
+
+float ClampAttributes::min_val() const
+{
+ return _min_val;
+}
+
+ClampAttributes &ClampAttributes::max_val(const float &max_val)
+{
+ _max_val = max_val;
+ return *this;
+}
+
+float ClampAttributes::max_val() const
+{
+ return _max_val;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
new file mode 100644
index 0000000000..6eaa45c25d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClComponentActivation.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+Status ClComponentActivation::validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
+{
+ ARM_COMPUTE_UNUSED(properties, attributes);
+
+ const ITensorInfo *const src = tensors.get_const_tensor(TensorType::ACL_SRC);
+ const ITensorInfo *const dst = tensors.get_const_tensor(TensorType::ACL_DST);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+
+ // All tensor infos are initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+
+ // Device requirements are met
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(dst);
+
+ return Status{};
+}
+
+ClComponentActivation::ClComponentActivation(ComponentId id,
+ const IGpuKernelComponent::Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
+ : IGpuKernelComponent{ id, properties, tensors },
+ _component_writer{ std::make_unique<ClTemplateActivation>(id, tensors, attributes) }
+{
+}
+
+const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const
+{
+ return _component_writer.get();
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
new file mode 100644
index 0000000000..d1b849ec73
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
+
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+
+/** Forward declaration */
+class ClTemplateActivation;
+
+class ClComponentActivation final : public IGpuKernelComponent
+{
+public:
+ /** Attributes are a set of backend-agnostic parameters that define what a component does */
+ using Attributes = ActivationLayerInfo;
+
+ /** Validate the component
+ *
+ * @param[in] properties Component properties @ref Properties
+ * @param[in, out] tensors Tensor arguments to the component
+ * @param[in] attributes Component attributes @ref Attributes
+ *
+ * @return Status Validation results
+ *
+ * Tensor argument names:
+ * - ACL_SRC: Input
+ * - ACL_DST: Output
+ *
+ * Tensor argument constness:
+ * - ACL_SRC: Const
+ * - ACL_DST: Const
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |ACL_SRC |ACL_DST |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ */
+ static Status validate(
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes);
+
+ /** Constructor
+ *
+ * Similar to @ref ClComponentActivation::validate()
+ */
+ ClComponentActivation(
+ ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes);
+
+ /** Destructor */
+ ~ClComponentActivation() override = default;
+
+ /** Prevent instances of this class from being copy constructed */
+ ClComponentActivation(const ClComponentActivation &component) = delete;
+
+ /** Prevent instances of this class from being copied */
+ ClComponentActivation &operator=(const ClComponentActivation &component) = delete;
+
+ /** Allow instances of this class to be move constructed */
+ ClComponentActivation(ClComponentActivation &&component) = default;
+
+ /** Allow instances of this class to be moved */
+ ClComponentActivation &operator=(ClComponentActivation &&component) = default;
+
+ /** Get template writer for the component */
+ const IGpuTemplateComponentWriter *template_writer() const override;
+
+ /** Get component type */
+ GpuComponentType type() const override
+ {
+ return GpuComponentType::Simple;
+ }
+
+private:
+ std::unique_ptr<ClTemplateActivation> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION */
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
new file mode 100644
index 0000000000..ffef6115d6
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h"
+
+#include "arm_compute/core/experimental/Types.h"
+
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
+
+#include "src/common/utils/Log.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
+} // namespace
+
+Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const ClampAttributes &attributes)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), "Maximum clamp value cannot be lower than minimum value");
+
+ // Auto initialize dst tensor info
+ TensorInfo dst_info_to_validate = *dst;
+ auto_init_if_empty(dst_info_to_validate, *src->clone());
+
+ // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
+ const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+
+ // Check components
+ if(context.gpu_language() == GpuLanguage::OpenCL)
+ {
+ // Validate Activation Component
+ const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC, src);
+ arguments.add_const_tensor(ACL_DST, &dst_info_to_validate);
+ ARM_COMPUTE_RETURN_ON_ERROR(ClComponentActivation::validate(properties, arguments, act_info));
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language");
+ }
+ return Status{};
+}
+
+Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const ClampAttributes &attributes)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+
+ // Check if tensors have valid id, i.e. they are created from a sketch
+ ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
+
+ // Auto initialize dst tensor info
+ TensorInfo dst_info_to_validate = *dst;
+ auto_init_if_empty(dst_info_to_validate, *src->clone());
+
+ // Perform fusion test to check if the operator meets fusion constraints
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC, src);
+ tensors.add_const_tensor(ACL_DST, &dst_info_to_validate);
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
+ "Operator fusion test failed. This operator cannot be fused into the workload");
+
+ // Check if configuration is supported
+ return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
+}
+
+void GpuClamp::create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *src,
+ ITensorInfo *dst,
+ const ClampAttributes &attributes)
+{
+ // Assert validation
+ ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, dst, attributes));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
+
+ // Auto initialize dst tensor
+ auto_init_if_empty(*dst, *src->clone());
+
+ // Translate into components and add to component graph
+ GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
+
+ // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
+ const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+
+ const auto *const sketch_ctx = sketch.implementation().context();
+
+ if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ {
+ // Add Activation Component
+ auto properties = IGpuKernelComponent::Properties();
+ properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC, src);
+ arguments.add_const_tensor(ACL_DST, dst);
+ comp_graph.add_new_component<ClComponentActivation>(properties, arguments, act_info);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unimplemented Gpu language");
+ }
+
+ // Set up fusion test by adding to the Operator Group
+ // Note this has to be performed after all the components have been successfully added to the component graph
+
+ // Pack tensor infos
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC, src);
+ tensors.add_const_tensor(ACL_DST, dst);
+
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ sketch.implementation().operator_group().add_operator(op);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
index 328e942955..4a1fb142d6 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
@@ -57,7 +57,7 @@ public:
* @param[in] id Component id
* @param[in] tensors Tensor arguments to the components
*/
- IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> tensors)
+ IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
: _id{ id }, _tensors{ tensors }
{
}
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
new file mode 100644
index 0000000000..c3128ea552
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateActivation.h"
+
+#include "arm_compute/core/Utils.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+ClTemplateActivation::ClTemplateActivation(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
+ : IGpuTemplateComponentWriter{ id, tensors },
+ _src{},
+ _dst{},
+ _attributes{ attributes }
+{
+ _src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
+ _dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
+}
+
+std::string ClTemplateActivation::get_name() const
+{
+ return "activation";
+}
+
+std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_group) const
+{
+ std::string code;
+ const bool is_root = (comp_group.get_root_component()->id() == this->id());
+
+ code = R"_(
+//------------------ START KERNEL {{meta_kernel_id}} ---------------------
+)_";
+ if(is_root)
+ {
+ code += R"_(
+// IN(src) {{src}}
+// OUT(dst, accum) {{dst}}
+
+TILE({{DATA_TYPE}}, M0, N0, {{dst}});
+TILE(uint, M0, 1, g_dst_indirect_y);
+{
+ {{src}}_offset_first_element_in_bytes += g_ind_2 * {{src}}_stride_z;
+
+ T_LOAD({{DATA_TYPE}}, M0, N0, {{TENSOR_TYPE}}, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{dst}});
+
+ T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{dst}}, {{dst}});
+}
+
+LOOP_UNROLLING(int, i, 0, 1, M0,
+{
+ g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
+ g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
+ g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
+})
+)_";
+ }
+ else
+ {
+ code += R"_(
+// IN/OUT(src, accum) {{src}}
+
+{
+ T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{src}});
+}
+)_";
+ }
+ code += R"_(
+//------------------ END KERNEL {{meta_kernel_id}} ---------------------
+)_";
+ return code;
+}
+
+void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ vtable.declare_variable(
+ _src,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_src),
+ "src");
+
+ vtable.declare_variable(
+ _dst,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_dst),
+ "dst");
+}
+
+TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ ARM_COMPUTE_UNUSED(comp_group);
+
+ TagLUT lut{};
+ // Arguments and global shared variables
+ lut["src"] = vtable.get_variable(_src);
+ lut["dst"] = vtable.get_variable(_dst);
+
+ const auto dst_argument = vtable.get_variable(comp_group.get_dst_tensors()[0]);
+ lut["arg_dst"] = dst_argument.uniq_name;
+
+ // Local build options
+ lut["meta_kernel_id"] = id();
+ lut["DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type());
+ lut["TENSOR_TYPE"] = "BUFFER";
+
+ const auto f_act = lower_string(string_from_activation_func(_attributes.activation()));
+
+ lut["ACT"] = f_act;
+ lut["A_VAL"] = float_to_string_with_full_precision(_attributes.a());
+ lut["B_VAL"] = float_to_string_with_full_precision(_attributes.b());
+
+ return lut;
+}
+
+CLBuildOptions ClTemplateActivation::get_build_options(const ComponentGroup &comp_group) const
+{
+ /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future
+ const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
+ const unsigned int n0 = root_window.x().step();
+ const unsigned int m0 = root_window.y().step();
+ const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
+
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
+ build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
+
+ return build_opts;
+}
+
+std::string ClTemplateActivation::get_config_id() const
+{
+ std::string config_id{};
+ config_id += "activation_";
+ config_id += lower_string(string_from_data_type(_src->data_type()));
+ config_id += "_";
+ config_id += support::cpp11::to_string(_src->dimension(0));
+ config_id += "_";
+ config_id += support::cpp11::to_string(_src->dimension(1));
+ return config_id;
+}
+
+std::set<std::string> ClTemplateActivation::get_headers_list() const
+{
+ return std::set<std::string>{ "helpers.h", "tile_helpers.h", "activation_float_helpers.h" };
+}
+
+Window ClTemplateActivation::get_window() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+ const unsigned int n0 = adjust_vec_size(16 / _src->element_size(), _src->dimension(0));
+ Window win = calculate_max_window(*_dst, Steps(n0));
+ return win.collapse(win, Window::DimZ);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
new file mode 100644
index 0000000000..22f8d428ab
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION
+
+#include "arm_compute/core/experimental/Types.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClTemplateActivation final : public IGpuTemplateComponentWriter
+{
+public:
+ using Attributes = ClComponentActivation::Attributes;
+
+ /** Constructor
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the components
+ * @param[in] attributes Component attributes
+ */
+ ClTemplateActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
+
+ /** Destructor */
+ ~ClTemplateActivation() override = default;
+
+ /** Prevent instances of this class from being copy constructed */
+ ClTemplateActivation(const ClTemplateActivation &activation) = delete;
+
+ /** Prevent instances of this class from being copied */
+ ClTemplateActivation &operator=(const ClTemplateActivation &activation) = delete;
+
+ /** Allow instances of this class to be move constructed */
+ ClTemplateActivation(ClTemplateActivation &&activation) = default;
+
+ /** Allow instances of this class to be moved */
+ ClTemplateActivation &operator=(ClTemplateActivation &&activation) = default;
+
+ /** Generate kernel component name */
+ std::string get_name() const override;
+
+ /** Generate kernel component code template
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return std::string Component code
+ */
+ std::string get_component_code(const ComponentGroup &comp_group) const override;
+
+ /** Declare all variables used by the component in the @p vtable
+ *
+ * @param[out] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ */
+ void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+
+ /** Generate the tag look-up table used to instantiate the component code.
+ *
+ * @param[in] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return TagLUT Tag lookup table
+ */
+ TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+
+ /** Generate the build options used in the component
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return CLBuildOptions Build options
+ */
+ CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
+
+ /** Generate the component config id string used for tuning */
+ std::string get_config_id() const override;
+
+ /** Generate the header list used in the component */
+ std::set<std::string> get_headers_list() const override;
+
+ /** Generate the execution window for the component */
+ Window get_window() const override;
+
+private:
+ const ITensorInfo *_src;
+ const ITensorInfo *_dst;
+ Attributes _attributes;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
index 48027a9b8d..77f83c9e87 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
@@ -54,6 +54,8 @@ public:
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings);
+ /** Destructor */
+ ~ClTemplateDirectConv2d() override = default;
/** Prevent instances of this class from being copy constructed */
ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete;
/** Prevent instances of this class from being copied */
diff --git a/src/gpu/cl/ClKernelLibrary.cpp b/src/gpu/cl/ClKernelLibrary.cpp
index 5f2fcd61fa..f788bedc34 100644
--- a/src/gpu/cl/ClKernelLibrary.cpp
+++ b/src/gpu/cl/ClKernelLibrary.cpp
@@ -486,6 +486,14 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map =
{
#ifdef EMBEDDED_KERNELS
{
+ "activation_float_helpers.h",
+#include "./cl_kernels/activation_float_helpers.hembed"
+ },
+ {
+ "activation_quant_helpers.h",
+#include "./cl_kernels/activation_quant_helpers.hembed"
+ },
+ {
"common/activation_layer.cl",
#include "./cl_kernels/common/activation_layer.clembed"
},