aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
-rw-r--r--Android.bp4
-rw-r--r--arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h63
-rw-r--r--arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h94
-rw-r--r--filelist.json4
-rw-r--r--src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp58
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp79
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h120
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp158
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h2
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp188
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h118
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h2
-rw-r--r--src/gpu/cl/ClKernelLibrary.cpp8
-rw-r--r--tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp189
-rw-r--r--tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h189
-rw-r--r--utils/TypePrinter.h29
16 files changed, 1304 insertions, 1 deletions
diff --git a/Android.bp b/Android.bp
index 77ca59536b..f4d94f9508 100644
--- a/Android.bp
+++ b/Android.bp
@@ -589,6 +589,7 @@ cc_library_static {
"src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp",
"src/dynamic_fusion/sketch/OperatorAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/CastAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
"src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp",
"src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp",
@@ -598,6 +599,7 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp",
"src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp",
"src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp",
@@ -605,10 +607,12 @@ cc_library_static {
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp",
diff --git a/arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h b/arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h
new file mode 100644
index 0000000000..70e449a656
--- /dev/null
+++ b/arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h
@@ -0,0 +1,63 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_CLAMPATTRIBUTES
+#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_CLAMPATTRIBUTES
+
+#include "arm_compute/core/Types.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Attributes are backend-agnostic parameters (in addition to the input/output tensors) of an operator.
+ */
+
+/** Clamp attributes */
+class ClampAttributes
+{
+public:
+ /** Set the minimum clip value */
+ ClampAttributes &min_val(const float &min_val);
+
+ /** Get the minimum clip value */
+ float min_val() const;
+
+ /** Set the maximum clip value */
+ ClampAttributes &max_val(const float &max_val);
+
+ /** Get the maximum clip value */
+ float max_val() const;
+
+private:
+ float _min_val{};
+ float _max_val{};
+};
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_ATTRIBUTES_CLAMPATTRIBUTES */
diff --git a/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h
new file mode 100644
index 0000000000..66d6c5f300
--- /dev/null
+++ b/arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h
@@ -0,0 +1,94 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCLAMP
+#define ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCLAMP
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+class GpuWorkloadContext;
+class GpuWorkloadSketch;
+
+/** Operator interface. */
+class GpuClamp final
+{
+public:
+ /** Attributes are a set of backend-agnostic parameters that define what an operator does */
+ using Attributes = ClampAttributes;
+
+ /** Create an operator and fuse it into the workload sketch.
+ * @note If @ref validate_op() fails, the creation also fails and may throw an error.
+ * @note If @ref validate_op() fails, @p sketch remains unchanged and valid.
+ *
+ * Valid data type configurations:
+ * |src |dst |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * @param[in, out] sketch Workload sketch into which the operator will be fused
+ * @param[in] src Source tensor info. Data types supported: F16/F32.
+ * @param[out] dst Destination tensor info. Data types supported: F16/F32.
+ * If an uninitialized ITensorInfo is passed in, it will be auto-initialized
+ * @param[in] attributes Operator attributes
+ */
+ static void create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *src,
+ ITensorInfo *dst,
+ const Attributes &attributes);
+
+ /** Check if the operator configuration is supported, irrespective of fusion
+ *
+ * @param[in] context Workload context within which the operator is running
+ * @param[in] src Source tensor info. Data types supported: F16/F32.
+ * @param[in] dst Destination tensor info. Data types supported: F16/F32.
+ * If an uninitialized ITensorInfo is passed in, it will be auto-initialized
+ * @param[in] attributes Operator attributes
+ */
+ static Status is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const Attributes &attributes);
+
+ /** Validate the operator and check if it can be fused into the workload sketch.
+ * Similar to @ref GpuClamp::create_op()
+ */
+ static Status validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const Attributes &attributes);
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif //ARM_COMPUTE_DYNAMIC_FUSION_SKETCH_GPU_OPERATORS_GPUCLAMP
diff --git a/filelist.json b/filelist.json
index 9b1db9b424..89ac6461b5 100644
--- a/filelist.json
+++ b/filelist.json
@@ -2200,6 +2200,7 @@
"src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp",
"src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp",
"src/dynamic_fusion/sketch/attributes/CastAttributes.cpp",
+ "src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp",
"src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp",
"src/dynamic_fusion/sketch/OperatorAttributes.cpp",
"src/dynamic_fusion/sketch/gpu/GpuKernelArgument.cpp",
@@ -2210,6 +2211,7 @@
"src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp",
"src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp",
"src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp",
+ "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp",
@@ -2217,9 +2219,11 @@
"src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp",
+ "src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp",
"src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp",
+ "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp",
"src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp",
diff --git a/src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp b/src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp
new file mode 100644
index 0000000000..b177f760df
--- /dev/null
+++ b/src/dynamic_fusion/sketch/attributes/ClampAttributes.cpp
@@ -0,0 +1,58 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+
+#include "arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+ClampAttributes &ClampAttributes::min_val(const float &min_val)
+{
+ _min_val = min_val;
+ return *this;
+}
+
+float ClampAttributes::min_val() const
+{
+ return _min_val;
+}
+
+ClampAttributes &ClampAttributes::max_val(const float &max_val)
+{
+ _max_val = max_val;
+ return *this;
+}
+
+float ClampAttributes::max_val() const
+{
+ return _max_val;
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
new file mode 100644
index 0000000000..6eaa45c25d
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
@@ -0,0 +1,79 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClComponentActivation.h"
+
+#include "src/core/CL/CLValidate.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+Status ClComponentActivation::validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
+{
+ ARM_COMPUTE_UNUSED(properties, attributes);
+
+ const ITensorInfo *const src = tensors.get_const_tensor(TensorType::ACL_SRC);
+ const ITensorInfo *const dst = tensors.get_const_tensor(TensorType::ACL_DST);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
+
+ // All tensor infos are initialized
+ ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
+ ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
+
+ // Device requirements are met
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
+ ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(dst);
+
+ return Status{};
+}
+
+ClComponentActivation::ClComponentActivation(ComponentId id,
+ const IGpuKernelComponent::Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
+ : IGpuKernelComponent{ id, properties, tensors },
+ _component_writer{ std::make_unique<ClTemplateActivation>(id, tensors, attributes) }
+{
+}
+
+const IGpuTemplateComponentWriter *ClComponentActivation::template_writer() const
+{
+ return _component_writer.get();
+}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
new file mode 100644
index 0000000000..d1b849ec73
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
@@ -0,0 +1,120 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
+
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+namespace arm_compute
+{
+/** Forward declaration */
+class ITensorInfo;
+namespace experimental
+{
+namespace dynamic_fusion
+{
+/** Forward declaration */
+template <typename T>
+class ArgumentPack;
+
+/** Forward declaration */
+class ClTemplateActivation;
+
+class ClComponentActivation final : public IGpuKernelComponent
+{
+public:
+ /** Attributes are a set of backend-agnostic parameters that define what a component does */
+ using Attributes = ActivationLayerInfo;
+
+ /** Validate the component
+ *
+ * @param[in] properties Component properties @ref Properties
+ * @param[in, out] tensors Tensor arguments to the component
+ * @param[in] attributes Component attributes @ref Attributes
+ *
+ * @return Status Validation results
+ *
+ * Tensor argument names:
+ * - ACL_SRC: Input
+ * - ACL_DST: Output
+ *
+ * Tensor argument constness:
+ * - ACL_SRC: Const
+ * - ACL_DST: Const
+ *
+ * Valid data layouts:
+ * - All
+ *
+ * Valid data type configurations:
+ * |ACL_SRC |ACL_DST |
+ * |:--------------|:--------------|
+ * |F16 |F16 |
+ * |F32 |F32 |
+ */
+ static Status validate(
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes);
+
+ /** Constructor
+ *
+ * Similar to @ref ClComponentActivation::validate()
+ */
+ ClComponentActivation(
+ ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes);
+
+ /** Destructor */
+ ~ClComponentActivation() override = default;
+
+ /** Prevent instances of this class from being copy constructed */
+ ClComponentActivation(const ClComponentActivation &component) = delete;
+
+ /** Prevent instances of this class from being copied */
+ ClComponentActivation &operator=(const ClComponentActivation &component) = delete;
+
+ /** Allow instances of this class to be move constructed */
+ ClComponentActivation(ClComponentActivation &&component) = default;
+
+ /** Allow instances of this class to be moved */
+ ClComponentActivation &operator=(ClComponentActivation &&component) = default;
+
+ /** Get template writer for the component */
+ const IGpuTemplateComponentWriter *template_writer() const override;
+
+ /** Get component type */
+ GpuComponentType type() const override
+ {
+ return GpuComponentType::Simple;
+ }
+
+private:
+ std::unique_ptr<ClTemplateActivation> _component_writer;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION */
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
new file mode 100644
index 0000000000..ffef6115d6
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
@@ -0,0 +1,158 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h"
+
+#include "arm_compute/core/experimental/Types.h"
+
+#include "src/core/helpers/AutoConfiguration.h"
+#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
+
+#include "src/common/utils/Log.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+namespace
+{
+constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
+} // namespace
+
+Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
+ const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const ClampAttributes &attributes)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), "Maximum clamp value cannot be lower than minimum value");
+
+ // Auto initialize dst tensor info
+ TensorInfo dst_info_to_validate = *dst;
+ auto_init_if_empty(dst_info_to_validate, *src->clone());
+
+ // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
+ const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+
+ // Check components
+ if(context.gpu_language() == GpuLanguage::OpenCL)
+ {
+ // Validate Activation Component
+ const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC, src);
+ arguments.add_const_tensor(ACL_DST, &dst_info_to_validate);
+ ARM_COMPUTE_RETURN_ON_ERROR(ClComponentActivation::validate(properties, arguments, act_info));
+ }
+ else
+ {
+ ARM_COMPUTE_RETURN_ERROR_MSG("Unimplemented Gpu language");
+ }
+ return Status{};
+}
+
+Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
+ const ITensorInfo *src,
+ const ITensorInfo *dst,
+ const ClampAttributes &attributes)
+{
+ ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
+
+ // Check if tensors have valid id, i.e. they are created from a sketch
+ ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
+
+ // Auto initialize dst tensor info
+ TensorInfo dst_info_to_validate = *dst;
+ auto_init_if_empty(dst_info_to_validate, *src->clone());
+
+ // Perform fusion test to check if the operator meets fusion constraints
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC, src);
+ tensors.add_const_tensor(ACL_DST, &dst_info_to_validate);
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!sketch.implementation().operator_group().try_add_operator(op),
+ "Operator fusion test failed. This operator cannot be fused into the workload");
+
+ // Check if configuration is supported
+ return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
+}
+
+void GpuClamp::create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *src,
+ ITensorInfo *dst,
+ const ClampAttributes &attributes)
+{
+ // Assert validation
+ ARM_COMPUTE_ERROR_THROW_ON(GpuClamp::validate_op(sketch, src, dst, attributes));
+ ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
+
+ // Auto initialize dst tensor
+ auto_init_if_empty(*dst, *src->clone());
+
+ // Translate into components and add to component graph
+ GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
+
+ // CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
+ const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+
+ const auto *const sketch_ctx = sketch.implementation().context();
+
+ if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ {
+ // Add Activation Component
+ auto properties = IGpuKernelComponent::Properties();
+ properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+
+ ArgumentPack<ITensorInfo> arguments;
+ arguments.add_const_tensor(ACL_SRC, src);
+ arguments.add_const_tensor(ACL_DST, dst);
+ comp_graph.add_new_component<ClComponentActivation>(properties, arguments, act_info);
+ }
+ else
+ {
+ ARM_COMPUTE_ERROR("Unimplemented Gpu language");
+ }
+
+ // Set up fusion test by adding to the Operator Group
+ // Note this has to be performed after all the components have been successfully added to the component graph
+
+ // Pack tensor infos
+ ArgumentPack<ITensorInfo> tensors;
+ tensors.add_const_tensor(ACL_SRC, src);
+ tensors.add_const_tensor(ACL_DST, dst);
+
+ const auto op = sketch.implementation().operator_group().new_operator(operator_type, tensors);
+ sketch.implementation().operator_group().add_operator(op);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
index 328e942955..4a1fb142d6 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
@@ -57,7 +57,7 @@ public:
* @param[in] id Component id
* @param[in] tensors Tensor arguments to the components
*/
- IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> tensors)
+ IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
: _id{ id }, _tensors{ tensors }
{
}
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
new file mode 100644
index 0000000000..c3128ea552
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
@@ -0,0 +1,188 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "ClTemplateActivation.h"
+
+#include "arm_compute/core/Utils.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "support/StringSupport.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+
+ClTemplateActivation::ClTemplateActivation(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
+ : IGpuTemplateComponentWriter{ id, tensors },
+ _src{},
+ _dst{},
+ _attributes{ attributes }
+{
+ _src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
+ _dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
+ ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
+}
+
+std::string ClTemplateActivation::get_name() const
+{
+ return "activation";
+}
+
+std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_group) const
+{
+ std::string code;
+ const bool is_root = (comp_group.get_root_component()->id() == this->id());
+
+ code = R"_(
+//------------------ START KERNEL {{meta_kernel_id}} ---------------------
+)_";
+ if(is_root)
+ {
+ code += R"_(
+// IN(src) {{src}}
+// OUT(dst, accum) {{dst}}
+
+TILE({{DATA_TYPE}}, M0, N0, {{dst}});
+TILE(uint, M0, 1, g_dst_indirect_y);
+{
+ {{src}}_offset_first_element_in_bytes += g_ind_2 * {{src}}_stride_z;
+
+ T_LOAD({{DATA_TYPE}}, M0, N0, {{TENSOR_TYPE}}, {{src}}, g_ind_0, g_ind_1, 1, {{src}}_stride_y, {{dst}});
+
+ T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{dst}}, {{dst}});
+}
+
+LOOP_UNROLLING(int, i, 0, 1, M0,
+{
+ g_dst_indirect_y[i].v = (uint)min((int)(g_ind_1 + i), (int)({{arg_dst}}_w) - 1);
+ g_dst_indirect_y[i].v += (int)(g_ind_2 % {{arg_dst}}_h) * (int)({{arg_dst}}_w);
+ g_dst_indirect_y[i].v += (int)(g_ind_2 / {{arg_dst}}_h) * (int)({{arg_dst}}_w * {{arg_dst}}_h);
+})
+)_";
+ }
+ else
+ {
+ code += R"_(
+// IN/OUT(src, accum) {{src}}
+
+{
+ T_ACTIVATION({{DATA_TYPE}}, M0, N0, {{ACT}}, {{A_VAL}}, {{B_VAL}}, {{src}}, {{src}});
+}
+)_";
+ }
+ code += R"_(
+//------------------ END KERNEL {{meta_kernel_id}} ---------------------
+)_";
+ return code;
+}
+
+void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ vtable.declare_variable(
+ _src,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_src),
+ "src");
+
+ vtable.declare_variable(
+ _dst,
+ GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ comp_group.is_intermediate_tensor(_dst),
+ "dst");
+}
+
+TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+{
+ ARM_COMPUTE_UNUSED(comp_group);
+
+ TagLUT lut{};
+ // Arguments and global shared variables
+ lut["src"] = vtable.get_variable(_src);
+ lut["dst"] = vtable.get_variable(_dst);
+
+ const auto dst_argument = vtable.get_variable(comp_group.get_dst_tensors()[0]);
+ lut["arg_dst"] = dst_argument.uniq_name;
+
+ // Local build options
+ lut["meta_kernel_id"] = id();
+ lut["DATA_TYPE"] = get_cl_type_from_data_type(_src->data_type());
+ lut["TENSOR_TYPE"] = "BUFFER";
+
+ const auto f_act = lower_string(string_from_activation_func(_attributes.activation()));
+
+ lut["ACT"] = f_act;
+ lut["A_VAL"] = float_to_string_with_full_precision(_attributes.a());
+ lut["B_VAL"] = float_to_string_with_full_precision(_attributes.b());
+
+ return lut;
+}
+
+CLBuildOptions ClTemplateActivation::get_build_options(const ComponentGroup &comp_group) const
+{
+ /// NOTE: For now tile sizes (n0, m0) are set by the execution window. This may change in the future
+ const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
+ const unsigned int n0 = root_window.x().step();
+ const unsigned int m0 = root_window.y().step();
+ const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
+
+ CLBuildOptions build_opts;
+ build_opts.add_option("-DN0=" + support::cpp11::to_string(n0));
+ build_opts.add_option("-DM0=" + support::cpp11::to_string(m0));
+ build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string(partial_store_n0));
+
+ return build_opts;
+}
+
+std::string ClTemplateActivation::get_config_id() const
+{
+ std::string config_id{};
+ config_id += "activation_";
+ config_id += lower_string(string_from_data_type(_src->data_type()));
+ config_id += "_";
+ config_id += support::cpp11::to_string(_src->dimension(0));
+ config_id += "_";
+ config_id += support::cpp11::to_string(_src->dimension(1));
+ return config_id;
+}
+
+std::set<std::string> ClTemplateActivation::get_headers_list() const
+{
+ return std::set<std::string>{ "helpers.h", "tile_helpers.h", "activation_float_helpers.h" };
+}
+
+Window ClTemplateActivation::get_window() const
+{
+ ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
+ const unsigned int n0 = adjust_vec_size(16 / _src->element_size(), _src->dimension(0));
+ Window win = calculate_max_window(*_dst, Steps(n0));
+ return win.collapse(win, Window::DimZ);
+}
+
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
new file mode 100644
index 0000000000..22f8d428ab
--- /dev/null
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
@@ -0,0 +1,118 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION
+#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION
+
+#include "arm_compute/core/experimental/Types.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
+
+namespace arm_compute
+{
+namespace experimental
+{
+namespace dynamic_fusion
+{
+class ClTemplateActivation final : public IGpuTemplateComponentWriter
+{
+public:
+ using Attributes = ClComponentActivation::Attributes;
+
+ /** Constructor
+ *
+ * @param[in] id Component id
+ * @param[in] tensors Tensor arguments to the components
+ * @param[in] attributes Component attributes
+ */
+ ClTemplateActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
+
+ /** Destructor */
+ ~ClTemplateActivation() override = default;
+
+ /** Prevent instances of this class from being copy constructed */
+ ClTemplateActivation(const ClTemplateActivation &activation) = delete;
+
+ /** Prevent instances of this class from being copied */
+ ClTemplateActivation &operator=(const ClTemplateActivation &activation) = delete;
+
+ /** Allow instances of this class to be move constructed */
+ ClTemplateActivation(ClTemplateActivation &&activation) = default;
+
+ /** Allow instances of this class to be moved */
+ ClTemplateActivation &operator=(ClTemplateActivation &&activation) = default;
+
+ /** Generate kernel component name */
+ std::string get_name() const override;
+
+ /** Generate kernel component code template
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return std::string Component code
+ */
+ std::string get_component_code(const ComponentGroup &comp_group) const override;
+
+ /** Declare all variables used by the component in the @p vtable
+ *
+ * @param[out] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ */
+ void declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+
+ /** Generate the tag look-up table used to instantiate the component code.
+ *
+ * @param[in] vtable Variable table
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return TagLUT Tag lookup table
+ */
+ TagLUT get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const override;
+
+ /** Generate the build options used in the component
+ *
+ * @param[in] comp_group Component group of which the component is a part of
+ *
+ * @return CLBuildOptions Build options
+ */
+ CLBuildOptions get_build_options(const ComponentGroup &comp_group) const override;
+
+ /** Generate the component config id string used for tuning */
+ std::string get_config_id() const override;
+
+ /** Generate the header list used in the component */
+ std::set<std::string> get_headers_list() const override;
+
+ /** Generate the execution window for the component */
+ Window get_window() const override;
+
+private:
+ const ITensorInfo *_src;
+ const ITensorInfo *_dst;
+ Attributes _attributes;
+};
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
+#endif /* SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEACTIVATION */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
index 48027a9b8d..77f83c9e87 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
@@ -54,6 +54,8 @@ public:
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings);
+ /** Destructor */
+ ~ClTemplateDirectConv2d() override = default;
/** Prevent instances of this class from being copy constructed */
ClTemplateDirectConv2d(const ClTemplateDirectConv2d &direct_conv2d) = delete;
/** Prevent instances of this class from being copied */
diff --git a/src/gpu/cl/ClKernelLibrary.cpp b/src/gpu/cl/ClKernelLibrary.cpp
index 5f2fcd61fa..f788bedc34 100644
--- a/src/gpu/cl/ClKernelLibrary.cpp
+++ b/src/gpu/cl/ClKernelLibrary.cpp
@@ -486,6 +486,14 @@ const std::map<std::string, std::string> ClKernelLibrary::_program_source_map =
{
#ifdef EMBEDDED_KERNELS
{
+ "activation_float_helpers.h",
+#include "./cl_kernels/activation_float_helpers.hembed"
+ },
+ {
+ "activation_quant_helpers.h",
+#include "./cl_kernels/activation_quant_helpers.hembed"
+ },
+ {
"common/activation_layer.cl",
#include "./cl_kernels/common/activation_layer.clembed"
},
diff --git a/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp b/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp
new file mode 100644
index 0000000000..947201ff97
--- /dev/null
+++ b/tests/validation/dynamic_fusion/gpu/cl/Clamp.cpp
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuClamp.h"
+#include "arm_compute/runtime/CL/CLTensor.h"
+
+#include "tests/CL/CLAccessor.h"
+#include "tests/datasets/ShapeDatasets.h"
+#include "tests/framework/Asserts.h"
+#include "tests/framework/Macros.h"
+#include "tests/framework/datasets/Datasets.h"
+#include "tests/validation/Validation.h"
+#include "tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h"
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+namespace
+{
+constexpr float epsilon = 1e-6f;
+constexpr AbsoluteTolerance<float> tolerance(epsilon);
+} // namespace
+
+TEST_SUITE(CL)
+TEST_SUITE(DYNAMIC_FUSION)
+TEST_SUITE(CLAMP)
+// *INDENT-OFF*
+// clang-format off
+DATA_TEST_CASE(Validate, framework::DatasetMode::ALL, zip(zip(zip(zip(
+ framework::dataset::make("InputInfo", { TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching data types
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Mismatching shapes
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32), // Minimum value larger than maximum value
+ }),
+ framework::dataset::make("OutputInfo",{ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(32U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(27U, 13U, 2U), 1, DataType::F16),
+ TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32),
+ TensorInfo(TensorShape(30U, 11U, 2U), 1, DataType::F32),
+ })),
+ framework::dataset::make("MinVal", { 0.2f,
+ 1.5f,
+ 0.1f,
+ 3.0f,
+ 9.0f,
+ })),
+ framework::dataset::make("MaxVal", { 0.5f,
+ 2.0f,
+ 1.0f,
+ 4.0f,
+ 1.0f,
+ })),
+ framework::dataset::make("Expected", { true, true, false, false, false })),
+ input_info, output_info, min_val, max_val, expected)
+{
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext gpu_ctx{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &gpu_ctx };
+
+ // Fuse Clamp
+ const TensorInfo src_info = sketch.create_tensor_info(input_info);
+ const TensorInfo dst_info = sketch.create_tensor_info(output_info);
+
+ ClampAttributes attributes {};
+ attributes.min_val(min_val)
+ .max_val(max_val);
+
+ const bool res = static_cast<bool>(GpuClamp::validate_op(sketch, &src_info, &dst_info, attributes));
+ ARM_COMPUTE_EXPECT(res == expected, framework::LogLevel::ERRORS);
+}
+// clang-format on
+// *INDENT-ON*
+
+template <typename T>
+using DynamicFusionClampOpFixture = DynamicFusionClampValidationFixture<CLTensor, CLAccessor, GpuClamp, T>;
+
+TEST_SUITE(Float)
+TEST_SUITE(FP16)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionClampOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("ClampAttributes", { ClampAttributes().min_val(0.1f).max_val(0.6f) })),
+ framework::dataset::make("Fuse", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp,
+ DynamicFusionClampOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::Small5dShapes(),
+ framework::dataset::make("ClampAttributes", { ClampAttributes().min_val(0.1f).max_val(0.6f) })),
+ framework::dataset::make("Fuse", { false })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionClampOpFixture<half>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("ClampAttributes", { ClampAttributes().min_val(0.2f).max_val(0.4f) })),
+ framework::dataset::make("Fuse", { true })),
+ framework::dataset::make("DataType", DataType::F16)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END() // FP16
+
+TEST_SUITE(FP32)
+FIXTURE_DATA_TEST_CASE(RunSmallOneOp,
+ DynamicFusionClampOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("ClampAttributes", { ClampAttributes().min_val(0.3f).max_val(0.7f) })),
+ framework::dataset::make("Fuse", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmall5dOneOp,
+ DynamicFusionClampOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::Small5dShapes(),
+ framework::dataset::make("ClampAttributes", { ClampAttributes().min_val(0.3f).max_val(0.7f) })),
+ framework::dataset::make("Fuse", { false })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+FIXTURE_DATA_TEST_CASE(RunSmallTwoOps,
+ DynamicFusionClampOpFixture<float>,
+ framework::DatasetMode::ALL,
+ combine(combine(combine(datasets::SmallShapes(),
+ framework::dataset::make("ClampAttributes", { ClampAttributes().min_val(0.1f).max_val(0.9f) })),
+ framework::dataset::make("Fuse", { true })),
+ framework::dataset::make("DataType", DataType::F32)))
+{
+ // Validate output
+ validate(CLAccessor(_target), _reference, tolerance);
+}
+
+TEST_SUITE_END() // FP32
+TEST_SUITE_END() // Float
+
+TEST_SUITE_END() // CLAMP
+TEST_SUITE_END() // DYNAMIC_FUSION
+TEST_SUITE_END() // CL
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
diff --git a/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h
new file mode 100644
index 0000000000..dbac29fd22
--- /dev/null
+++ b/tests/validation/fixtures/dynamic_fusion/operators/ClampFixture.h
@@ -0,0 +1,189 @@
+/*
+ * Copyright (c) 2022 Arm Limited.
+ *
+ * SPDX-License-Identifier: MIT
+ *
+ * Permission is hereby granted, free of charge, to any person obtaining a copy
+ * of this software and associated documentation files (the "Software"), to
+ * deal in the Software without restriction, including without limitation the
+ * rights to use, copy, modify, merge, publish, distribute, sublicense, and/or
+ * sell copies of the Software, and to permit persons to whom the Software is
+ * furnished to do so, subject to the following conditions:
+ *
+ * The above copyright notice and this permission notice shall be included in all
+ * copies or substantial portions of the Software.
+ *
+ * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR
+ * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY,
+ * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE
+ * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER
+ * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM,
+ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE
+ * SOFTWARE.
+ */
+#ifndef TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE
+#define TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE
+
+#include "arm_compute/core/CL/CLKernelLibrary.h"
+#include "arm_compute/core/TensorInfo.h"
+#include "arm_compute/core/Types.h"
+#include "arm_compute/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.h"
+#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+
+#include "tests/framework/Fixture.h"
+#include "tests/validation/reference/ActivationLayer.h"
+
+using namespace arm_compute::experimental::dynamic_fusion;
+
+namespace arm_compute
+{
+namespace test
+{
+namespace validation
+{
+template <typename TensorType, typename AccessorType, typename FunctionType, typename T>
+class DynamicFusionClampValidationFixture : public framework::Fixture
+{
+public:
+ template <typename...>
+ void setup(TensorShape shape, ClampAttributes attributes, bool fuse, DataType data_type)
+ {
+ // CLAMP is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped.
+ ActivationLayerInfo act_info{ ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val() };
+
+ _fuse = fuse;
+ _attributes = attributes;
+ _data_type = data_type;
+ _target = compute_target(shape, attributes);
+ _reference = compute_reference(shape, act_info);
+ }
+
+protected:
+ std::vector<T> get_boundary_values(T min, T max)
+ {
+ // This function will return a vector filled with the following values that can
+ // represent two partitions derived from equivalent partitioning.
+ // * Lower partition: min, min + delta, lower quarter (nominal), center - delta
+ // * Upper partition: center, center + delta, upper quarter (nominal), max - delta, max
+ const auto delta = is_data_type_float(_data_type) ? T(0.1f) : T(1);
+ const auto center_value = (min + max) / 2;
+ const auto lower_quarter = (min + center_value) / 2;
+ const auto upper_quarter = (center_value + max) / 2;
+
+ std::vector<T> boundary_values{};
+
+ // To ensure all the inserted values are within the given range after subtracing/adding delta
+ auto insert_values = [&boundary_values, &min, &max](const std::initializer_list<T> &new_values)
+ {
+ for(auto &v : new_values)
+ {
+ if(v >= min && v <= max)
+ {
+ boundary_values.emplace_back(v);
+ }
+ }
+ };
+
+ insert_values({ min, static_cast<T>(min + delta), static_cast<T>(lower_quarter), static_cast<T>(center_value - delta) }); // lower partition
+ insert_values({ static_cast<T>(center_value), static_cast<T>(center_value + delta), static_cast<T>(upper_quarter), static_cast<T>(max - delta), max }); // upper partition
+
+ return boundary_values;
+ }
+
+ template <typename U>
+ void fill(U &&tensor)
+ {
+ float min_bound = 0;
+ float max_bound = 0;
+ std::tie(min_bound, max_bound) = get_activation_layer_test_bounds<T>(ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, _data_type);
+ library->fill_static_values(tensor, get_boundary_values(static_cast<T>(min_bound), static_cast<T>(max_bound)));
+ }
+
+ TensorType compute_target(const TensorShape &shape, ClampAttributes attributes)
+ {
+ // Create a new workload sketch
+ CLCompileContext cl_compile_ctx = CLKernelLibrary::get().get_compile_context();
+ GpuWorkloadContext gpu_ctx{ &cl_compile_ctx };
+ GpuWorkloadSketch sketch{ &gpu_ctx };
+
+ // Create sketch tensors
+ TensorInfo src_info = sketch.create_tensor_info(TensorInfo(shape, 1, _data_type));
+ TensorInfo dst_0_info = sketch.create_tensor_info(TensorInfo(shape, 1, _data_type));
+ TensorInfo dst_1_info;
+
+ FunctionType::create_op(sketch, &src_info, &dst_0_info, attributes);
+ if(_fuse)
+ {
+ dst_1_info = sketch.create_tensor_info(shape, 1, _data_type);
+ FunctionType::create_op(sketch, &dst_0_info, &dst_1_info, attributes);
+ }
+
+ // Configure runtime
+ ClWorkloadRuntime runtime;
+ runtime.configure(sketch);
+
+ // Construct user tensors
+ TensorType t_src{};
+ TensorType t_dst_0{};
+ TensorType t_dst_1{};
+
+ // Initialize user tensors
+ t_src.allocator()->init(src_info);
+ t_dst_0.allocator()->init(dst_0_info);
+ if(_fuse)
+ {
+ t_dst_1.allocator()->init(dst_1_info);
+ }
+
+ // Allocate and fill user tensors
+ t_src.allocator()->allocate();
+ t_dst_0.allocator()->allocate();
+ if(_fuse)
+ {
+ t_dst_1.allocator()->allocate();
+ }
+
+ fill(AccessorType(t_src));
+
+ // Run runtime
+ if(_fuse)
+ {
+ runtime.run({ &t_src, &t_dst_1 });
+ }
+ else
+ {
+ runtime.run({ &t_src, &t_dst_0 });
+ }
+
+ if(_fuse)
+ {
+ return t_dst_1;
+ }
+
+ return t_dst_0;
+ }
+
+ SimpleTensor<T> compute_reference(const TensorShape &shape, ActivationLayerInfo act_info)
+ {
+ // Create reference
+ SimpleTensor<T> src{ shape, _data_type, 1, _quantization_info };
+
+ // Fill reference
+ fill(src);
+
+ auto dst = reference::activation_layer<T>(src, act_info, _quantization_info);
+ return dst;
+ }
+
+protected:
+ QuantizationInfo _quantization_info{};
+ ClampAttributes _attributes{};
+ bool _fuse{ false };
+ DataType _data_type{};
+ TensorType _target{};
+ SimpleTensor<T> _reference{};
+};
+} // namespace validation
+} // namespace test
+} // namespace arm_compute
+#endif /* TESTS_VALIDATION_FIXTURES_DYNAMIC_FUSION_OPERATORS_CLAMPFIXTURE */
diff --git a/utils/TypePrinter.h b/utils/TypePrinter.h
index 515e568657..9f20b38b96 100644
--- a/utils/TypePrinter.h
+++ b/utils/TypePrinter.h
@@ -40,6 +40,7 @@
#include "arm_compute/core/experimental/PostOps.h"
#include "arm_compute/dynamic_fusion/sketch/OperatorAttributes.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
+#include "arm_compute/dynamic_fusion/sketch/attributes/ClampAttributes.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
#include "arm_compute/runtime/CL/CLTunerTypes.h"
#include "arm_compute/runtime/CL/CLTypes.h"
@@ -3495,6 +3496,34 @@ inline std::string to_string(const experimental::dynamic_fusion::DepthwiseConv2d
return str.str();
}
+/** Formatted output of the arm_compute::experimental::dynamic_fusion::ClampAttributes type.
+ *
+ * @param[out] os Output stream.
+ * @param[in] clamp_attr arm_compute::experimental::dynamic_fusion::ClampAttributes type to output.
+ *
+ * @return Modified output stream.
+ */
+inline ::std::ostream &operator<<(::std::ostream &os, const experimental::dynamic_fusion::ClampAttributes &clamp_attr)
+{
+ os << "ClampAttributes="
+ << "["
+ << "Min value=" << clamp_attr.min_val() << ", "
+ << "Max value=" << clamp_attr.max_val() << "]";
+ return os;
+}
+/** Formatted output of the arm_compute::experimental::dynamic_fusion::ClampAttributes type.
+ *
+ * @param[in] clamp_attr arm_compute::experimental::dynamic_fusion::ClampAttributes type to output.
+ *
+ * @return Formatted string.
+ */
+inline std::string to_string(const experimental::dynamic_fusion::ClampAttributes &clamp_attr)
+{
+ std::stringstream str;
+ str << clamp_attr;
+ return str.str();
+}
+
} // namespace arm_compute
#endif /* __ARM_COMPUTE_TYPE_PRINTER_H__ */