aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp')
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp38
1 files changed, 21 insertions, 17 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
index ffc4553a7d..c87b282aec 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
@@ -22,13 +22,14 @@
* SOFTWARE.
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h"
+
#include "arm_compute/core/Error.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
#include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
@@ -52,7 +53,7 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
// Auto initialize dst tensor info
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate = *dst;
}
@@ -61,11 +62,12 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
auto_init_if_empty(dst_info_to_validate, *src->clone());
}
// Check components
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = context.cl_compile_context();
ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
- const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ const KernelProperties properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
TensorShape logits_sum_shape = src->tensor_shape();
TensorInfo logits(src->clone()->set_tensor_shape(logits_sum_shape));
@@ -86,7 +88,8 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
arguments_norm.add_const_tensor(ACL_SRC_1, &sum);
arguments_norm.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
- ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes));
ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DNorm::validate(properties, arguments_norm, attributes));
}
else
@@ -105,14 +108,16 @@ Status GpuSoftmax::validate_op(const GpuWorkloadSketch &sketch,
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->num_dimensions() > 4, "Only up to 4 dimensions are supported");
- ARM_COMPUTE_RETURN_ERROR_ON(attributes.axis() < static_cast<int32_t>(-src->num_dimensions()) || static_cast<int32_t>(src->num_dimensions()) <= attributes.axis());
+ ARM_COMPUTE_RETURN_ERROR_ON(attributes.axis() < static_cast<int32_t>(-src->num_dimensions()) ||
+ static_cast<int32_t>(src->num_dimensions()) <= attributes.axis());
// Auto initialize dst tensor info
TensorInfo dst_info_to_validate = *dst;
auto_init_if_empty(dst_info_to_validate, *src->clone());
- const size_t actual_axis = static_cast<size_t>(wrap_around(attributes.axis(), static_cast<int32_t>(src->num_dimensions())));
- const bool needs_permute = actual_axis != 0;
+ const size_t actual_axis =
+ static_cast<size_t>(wrap_around(attributes.axis(), static_cast<int32_t>(src->num_dimensions())));
+ const bool needs_permute = actual_axis != 0;
ARM_COMPUTE_RETURN_ERROR_ON_MSG(needs_permute, "Dynamic fusion softmax on axis!=0 not supported yet.");
// Perform fusion test and check if the operator meets the fusion constraints
@@ -128,17 +133,16 @@ Status GpuSoftmax::validate_op(const GpuWorkloadSketch &sketch,
return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
}
-void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- ITensorInfo *dst,
- const Attributes &attributes)
+void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *dst, const Attributes &attributes)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
TensorShape logits_sum_shape = src->tensor_shape();
- ITensorInfo *logits = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
+ ITensorInfo *logits = sketch.implementation().create_auxiliary_tensor(
+ src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
logits_sum_shape.set(0, 1);
- ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
+ ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(
+ src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
// Auto initialize dst tensor info and the auxiliary tensor infos as well
auto_init_if_empty(*dst, *src->clone());
@@ -151,7 +155,7 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
auto &comp_graph = sketch.implementation().component_graph();
const auto sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
ARM_COMPUTE_UNUSED(cl_compile_ctx);
@@ -160,7 +164,7 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
// Add Direct Conv2d Component
{
auto properties = IGpuKernelComponent::Properties();
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments_exp_sum;
ArgumentPack<ITensorInfo> arguments_norm;