aboutsummaryrefslogtreecommitdiff
diff options
context:
space:
mode:
authorramelg01 <ramy.elgammal@arm.com>2021-09-22 10:48:25 +0100
committerramy.elgammal <ramy.elgammal@arm.com>2021-09-28 21:52:49 +0000
commit2e53f17f4f3c9179455c05d49a47a236067e00c0 (patch)
treebdd109231d87b82266917011db5fec769e1b158c
parenta71711008dad9a786a66dcd734b19cb102d65ec5 (diff)
downloadComputeLibrary-2e53f17f4f3c9179455c05d49a47a236067e00c0.tar.gz
Provide logging for configure functions in all gpu operators
Partially Resolves: COMPMID-4718 Signed-off-by: Ramy Elgammal <ramy.elgammal@arm.com> Change-Id: I3d80e732fc957114ec84ef8350dbf12eeae23054 Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/6301 Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Freddie Liardet <frederick.liardet@arm.com> Reviewed-by: Jakub Jan Sujak <jakub.sujak@arm.com> Reviewed-by: Giorgio Arena <giorgio.arena@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
-rw-r--r--src/gpu/cl/operators/ClActivation.cpp2
-rw-r--r--src/gpu/cl/operators/ClAdd.cpp3
-rw-r--r--src/gpu/cl/operators/ClCast.cpp3
-rw-r--r--src/gpu/cl/operators/ClConcatenate.cpp3
-rw-r--r--src/gpu/cl/operators/ClConv2d.cpp3
-rw-r--r--src/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp3
-rw-r--r--src/gpu/cl/operators/ClCopy.cpp3
-rw-r--r--src/gpu/cl/operators/ClCrop.cpp3
-rw-r--r--src/gpu/cl/operators/ClDequantize.cpp3
-rw-r--r--src/gpu/cl/operators/ClDirectConv2d.cpp3
-rw-r--r--src/gpu/cl/operators/ClElementwiseOperations.cpp3
-rw-r--r--src/gpu/cl/operators/ClElementwiseUnary.cpp3
-rw-r--r--src/gpu/cl/operators/ClFill.cpp3
-rw-r--r--src/gpu/cl/operators/ClFlatten.cpp3
-rw-r--r--src/gpu/cl/operators/ClFloor.cpp3
-rw-r--r--src/gpu/cl/operators/ClFullyConnected.cpp2
-rw-r--r--src/gpu/cl/operators/ClGemm.cpp3
-rw-r--r--src/gpu/cl/operators/ClGemmConv2d.cpp3
-rw-r--r--src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp2
-rw-r--r--src/gpu/cl/operators/ClGemmLowpOutputStage.cpp3
-rw-r--r--src/gpu/cl/operators/ClLogicalNot.cpp3
-rw-r--r--src/gpu/cl/operators/ClMul.cpp3
-rw-r--r--src/gpu/cl/operators/ClPRelu.cpp4
-rw-r--r--src/gpu/cl/operators/ClPermute.cpp3
-rw-r--r--src/gpu/cl/operators/ClPool2d.cpp4
-rw-r--r--src/gpu/cl/operators/ClQuantize.cpp3
-rw-r--r--src/gpu/cl/operators/ClReshape.cpp3
-rw-r--r--src/gpu/cl/operators/ClScale.cpp4
-rw-r--r--src/gpu/cl/operators/ClSoftmax.cpp3
-rw-r--r--src/gpu/cl/operators/ClSub.cpp3
-rw-r--r--src/gpu/cl/operators/ClTranspose.cpp3
-rw-r--r--src/gpu/cl/operators/ClWinogradConv2d.cpp4
32 files changed, 96 insertions, 1 deletions
diff --git a/src/gpu/cl/operators/ClActivation.cpp b/src/gpu/cl/operators/ClActivation.cpp
index 6b36cc34b4..74a818d738 100644
--- a/src/gpu/cl/operators/ClActivation.cpp
+++ b/src/gpu/cl/operators/ClActivation.cpp
@@ -28,6 +28,7 @@
#include "src/common/IOperator.h"
#include "src/common/utils/LegacySupport.h"
+#include "src/common/utils/Log.h"
#include "src/gpu/cl/ClContext.h"
namespace arm_compute
@@ -36,6 +37,7 @@ namespace opencl
{
void ClActivation::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ActivationLayerInfo &act_info)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst, act_info);
auto k = std::make_unique<kernels::ClActivationKernel>();
k->configure(compile_context, src, dst, act_info);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClAdd.cpp b/src/gpu/cl/operators/ClAdd.cpp
index e1a013a6b5..b9bf505bba 100644
--- a/src/gpu/cl/operators/ClAdd.cpp
+++ b/src/gpu/cl/operators/ClAdd.cpp
@@ -26,6 +26,8 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClElementwiseKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
@@ -33,6 +35,7 @@ namespace opencl
void ClAdd::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst,
ConvertPolicy policy, const ActivationLayerInfo &act_info)
{
+ ARM_COMPUTE_LOG_PARAMS(src1, src2, dst, policy, act_info);
auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>();
k->configure(compile_context, ArithmeticOperation::ADD, src1, src2, dst, policy, act_info);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClCast.cpp b/src/gpu/cl/operators/ClCast.cpp
index 8911d208a7..05ea21b734 100644
--- a/src/gpu/cl/operators/ClCast.cpp
+++ b/src/gpu/cl/operators/ClCast.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClCastKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClCast::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, ConvertPolicy policy)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst, policy);
auto k = std::make_unique<kernels::ClCastKernel>();
k->configure(compile_context, src, dst, policy);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClConcatenate.cpp b/src/gpu/cl/operators/ClConcatenate.cpp
index 731d9b5054..a27fc37cc4 100644
--- a/src/gpu/cl/operators/ClConcatenate.cpp
+++ b/src/gpu/cl/operators/ClConcatenate.cpp
@@ -36,6 +36,8 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/TensorInfo.h"
#include "arm_compute/core/Types.h"
+
+#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
namespace arm_compute
@@ -45,6 +47,7 @@ namespace opencl
void ClConcatenate::configure(const CLCompileContext &compile_context, const std::vector<ITensorInfo *> &src_vector, ITensorInfo *dst, size_t axis)
{
ARM_COMPUTE_ERROR_ON(dst == nullptr);
+ ARM_COMPUTE_LOG_PARAMS(src_vector, dst, axis);
_axis = axis;
_num_inputs = src_vector.size();
diff --git a/src/gpu/cl/operators/ClConv2d.cpp b/src/gpu/cl/operators/ClConv2d.cpp
index c91a4831a8..7fe0de7a6f 100644
--- a/src/gpu/cl/operators/ClConv2d.cpp
+++ b/src/gpu/cl/operators/ClConv2d.cpp
@@ -34,6 +34,8 @@
#include "src/gpu/cl/operators/ClGemmConv2d.h"
#include "src/gpu/cl/operators/ClWinogradConv2d.h"
+#include "src/common/utils/Log.h"
+
#include <memory>
namespace
@@ -83,6 +85,7 @@ void ClConv2d::configure(const CLCompileContext &compile_context, ITensorInfo *s
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, weights, dst);
ARM_COMPUTE_ERROR_THROW_ON(ClConv2d::validate(src, weights, ((biases != nullptr) ? biases : nullptr), dst, conv2d_info, weights_info));
+ ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv2d_info, weights_info);
switch(ClConv2d::get_convolution_method(src, weights, dst, conv2d_info, weights_info, CLScheduler::get().target()))
{
diff --git a/src/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp b/src/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp
index 61e33f2fdb..08122b6852 100644
--- a/src/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp
+++ b/src/gpu/cl/operators/ClConvertFullyConnectedWeights.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClConvertFullyConnectedWeightsKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClConvertFullyConnectedWeights::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const TensorShape &original_src_shape, DataLayout data_layout)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst, original_src_shape, data_layout);
auto k = std::make_unique<kernels::ClConvertFullyConnectedWeightsKernel>();
k->configure(compile_context, src, dst, original_src_shape, data_layout);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClCopy.cpp b/src/gpu/cl/operators/ClCopy.cpp
index c1a9f264b6..d3b83040d0 100644
--- a/src/gpu/cl/operators/ClCopy.cpp
+++ b/src/gpu/cl/operators/ClCopy.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClCopyKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClCopy::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, Window *dst_window)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst, dst_window);
auto k = std::make_unique<kernels::ClCopyKernel>();
k->configure(compile_context, src, dst, dst_window);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClCrop.cpp b/src/gpu/cl/operators/ClCrop.cpp
index a6a1c8b103..cef9f14c7d 100644
--- a/src/gpu/cl/operators/ClCrop.cpp
+++ b/src/gpu/cl/operators/ClCrop.cpp
@@ -26,6 +26,8 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClCropKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
@@ -33,6 +35,7 @@ namespace opencl
void ClCrop::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, Coordinates2D start, Coordinates2D end, uint32_t batch_index, float extrapolation_value,
Window *dst_window)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst, start, end, batch_index, extrapolation_value, dst_window);
auto k = std::make_unique<kernels::ClCropKernel>();
k->configure(compile_context, src, dst, start, end, batch_index, extrapolation_value, dst_window);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClDequantize.cpp b/src/gpu/cl/operators/ClDequantize.cpp
index dbaa5f67df..0fccab63e0 100644
--- a/src/gpu/cl/operators/ClDequantize.cpp
+++ b/src/gpu/cl/operators/ClDequantize.cpp
@@ -28,12 +28,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClDequantizeKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClDequantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst);
auto k = std::make_unique<kernels::ClDequantizeKernel>();
k->configure(compile_context, src, dst);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClDirectConv2d.cpp b/src/gpu/cl/operators/ClDirectConv2d.cpp
index 50e63beedc..066959f400 100644
--- a/src/gpu/cl/operators/ClDirectConv2d.cpp
+++ b/src/gpu/cl/operators/ClDirectConv2d.cpp
@@ -29,6 +29,8 @@
#include "src/gpu/cl/kernels/ClActivationKernel.h"
#include "src/gpu/cl/kernels/ClDirectConv2dKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
@@ -48,6 +50,7 @@ void ClDirectConv2d::configure(const CLCompileContext &compile_context, ITensorI
const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
+ ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info);
// Configure direct convolution kernel
const ActivationLayerInfo conv2d_act_info = (src->data_layout() == DataLayout::NHWC && is_data_type_float(src->data_type())) ? act_info : ActivationLayerInfo();
diff --git a/src/gpu/cl/operators/ClElementwiseOperations.cpp b/src/gpu/cl/operators/ClElementwiseOperations.cpp
index 4e4cd5ae9d..2525041d2b 100644
--- a/src/gpu/cl/operators/ClElementwiseOperations.cpp
+++ b/src/gpu/cl/operators/ClElementwiseOperations.cpp
@@ -25,12 +25,15 @@
#include "src/gpu/cl/kernels/ClElementwiseKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClElementwiseDivision::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, const ActivationLayerInfo &act_info)
{
+ ARM_COMPUTE_LOG_PARAMS(src1, src2, dst, act_info);
auto k = std::make_unique<kernels::ClArithmeticKernel>();
k->configure(compile_context, ArithmeticOperation::DIV, src1, src2, dst, act_info);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClElementwiseUnary.cpp b/src/gpu/cl/operators/ClElementwiseUnary.cpp
index 24a603e8c3..270769b3de 100644
--- a/src/gpu/cl/operators/ClElementwiseUnary.cpp
+++ b/src/gpu/cl/operators/ClElementwiseUnary.cpp
@@ -25,12 +25,15 @@
#include "src/gpu/cl/kernels/ClElementwiseUnaryKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClRsqrt::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst);
auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
k->configure(compile_context, src, dst, ElementWiseUnary::RSQRT);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClFill.cpp b/src/gpu/cl/operators/ClFill.cpp
index 9e006c1649..ad22b15cff 100644
--- a/src/gpu/cl/operators/ClFill.cpp
+++ b/src/gpu/cl/operators/ClFill.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClFillKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClFill::configure(const ClCompileContext &compile_context, ITensorInfo *tensor, const PixelValue &constant_value, Window *dst_window)
{
+ ARM_COMPUTE_LOG_PARAMS(tensor, constant_value, dst_window);
auto k = std::make_unique<kernels::ClFillKernel>();
k->configure(compile_context, tensor, constant_value, dst_window);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClFlatten.cpp b/src/gpu/cl/operators/ClFlatten.cpp
index 3283454fd6..e277c0d7e4 100644
--- a/src/gpu/cl/operators/ClFlatten.cpp
+++ b/src/gpu/cl/operators/ClFlatten.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClReshapeKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClFlatten::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst);
auto k = std::make_unique<kernels::ClReshapeKernel>();
k->configure(compile_context, src, dst);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClFloor.cpp b/src/gpu/cl/operators/ClFloor.cpp
index 866bff2fad..84f685e381 100644
--- a/src/gpu/cl/operators/ClFloor.cpp
+++ b/src/gpu/cl/operators/ClFloor.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClFloorKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClFloor::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst);
auto k = std::make_unique<kernels::ClFloorKernel>();
k->configure(compile_context, src, dst);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClFullyConnected.cpp b/src/gpu/cl/operators/ClFullyConnected.cpp
index 8b7e336c9f..165ffe9a47 100644
--- a/src/gpu/cl/operators/ClFullyConnected.cpp
+++ b/src/gpu/cl/operators/ClFullyConnected.cpp
@@ -38,6 +38,7 @@
#include "src/gpu/cl/operators/ClTranspose.h"
#include "src/gpu/cl/utils/ClAuxTensorHandler.h"
+#include "src/common/utils/Log.h"
#include "support/Cast.h"
#include <algorithm>
@@ -231,6 +232,7 @@ void ClFullyConnected::configure(const CLCompileContext &compile_context, ITenso
// Perform validate step
ARM_COMPUTE_ERROR_THROW_ON(ClFullyConnected::validate(src, weights, biases, dst, fc_info));
+ ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, fc_info);
_are_weights_converted = true;
_are_weights_reshaped = fc_info.transpose_weights ? fc_info.are_weights_reshaped : true;
diff --git a/src/gpu/cl/operators/ClGemm.cpp b/src/gpu/cl/operators/ClGemm.cpp
index 625c057cf4..e955ae3d65 100644
--- a/src/gpu/cl/operators/ClGemm.cpp
+++ b/src/gpu/cl/operators/ClGemm.cpp
@@ -38,7 +38,6 @@
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "arm_compute/runtime/ITensorAllocator.h"
-#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/core/helpers/MemoryHelpers.h"
#include "src/core/utils/helpers/float_ops.h"
@@ -47,6 +46,7 @@
#include "src/runtime/CL/gemm/CLGEMMKernelSelection.h"
#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
+#include "src/common/utils/Log.h"
#include "support/Cast.h"
#include "utils/TypePrinter.h"
@@ -561,6 +561,7 @@ void ClGemm::configure(const CLCompileContext &compile_context, ITensorInfo *a,
// Perform validation step
ARM_COMPUTE_ERROR_THROW_ON(validate(a, b, c, output, alpha, beta, gemm_info));
+ ARM_COMPUTE_LOG_PARAMS(a, b, c, output, alpha, beta, gemm_info);
// Check if we need to reshape the matrix B only on the first run
_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
diff --git a/src/gpu/cl/operators/ClGemmConv2d.cpp b/src/gpu/cl/operators/ClGemmConv2d.cpp
index 0f625bc56a..785f1f1c9c 100644
--- a/src/gpu/cl/operators/ClGemmConv2d.cpp
+++ b/src/gpu/cl/operators/ClGemmConv2d.cpp
@@ -41,6 +41,8 @@
#include "src/gpu/cl/operators/ClGemm.h"
#include "src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.h"
#include "src/gpu/cl/utils/ClAuxTensorHandler.h"
+
+#include "src/common/utils/Log.h"
#include "support/Cast.h"
namespace arm_compute
@@ -159,6 +161,7 @@ void ClGemmConv2d::configure(const CLCompileContext &compile_context, ITensorInf
ARM_COMPUTE_ERROR_THROW_ON(ClGemmConv2d::validate(src, weights, biases, dst,
conv2d_info,
weights_info));
+ ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv2d_info, weights_info);
const DataType data_type = src->data_type();
const DataLayout data_layout = src->data_layout();
diff --git a/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp b/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp
index f3c0ee1c8f..6fd7e52d5d 100644
--- a/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp
+++ b/src/gpu/cl/operators/ClGemmLowpMatrixMultiplyCore.cpp
@@ -47,6 +47,7 @@
#include "src/gpu/cl/utils/ClAuxTensorHandler.h"
#include "src/runtime/CL/gemm_auto_heuristics/CLGEMMAutoHeuristics.h"
+#include "src/common/utils/Log.h"
#include "utils/TypePrinter.h"
namespace arm_compute
@@ -218,6 +219,7 @@ void ClGemmLowpMatrixMultiplyCore::configure(const CLCompileContext &compile_con
{
ARM_COMPUTE_ERROR_ON_NULLPTR(a, b, output);
ARM_COMPUTE_ERROR_THROW_ON(ClGemmLowpMatrixMultiplyCore::validate(a, b, c != nullptr ? c : nullptr, output, gemm_info));
+ ARM_COMPUTE_LOG_PARAMS(a, b, c, output, gemm_info);
_reshape_b_only_on_first_run = gemm_info.reshape_b_only_on_first_run();
_a_offset = a->quantization_info().uniform().offset;
diff --git a/src/gpu/cl/operators/ClGemmLowpOutputStage.cpp b/src/gpu/cl/operators/ClGemmLowpOutputStage.cpp
index 27fb89217c..a61b11a3b1 100644
--- a/src/gpu/cl/operators/ClGemmLowpOutputStage.cpp
+++ b/src/gpu/cl/operators/ClGemmLowpOutputStage.cpp
@@ -31,6 +31,8 @@
#include "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleByFloatKernel.h"
#include "src/gpu/cl/kernels/ClGemmLowpQuantizeDownInt32ScaleKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
@@ -38,6 +40,7 @@ namespace opencl
void ClGemmLowpOutputStage::configure(const CLCompileContext &compile_context, const ITensorInfo *src, const ITensorInfo *bias, ITensorInfo *dst, const GEMMLowpOutputStageInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
+ ARM_COMPUTE_LOG_PARAMS(src, bias, dst, info);
switch(info.type)
{
diff --git a/src/gpu/cl/operators/ClLogicalNot.cpp b/src/gpu/cl/operators/ClLogicalNot.cpp
index b909066e4c..b2eb89b320 100644
--- a/src/gpu/cl/operators/ClLogicalNot.cpp
+++ b/src/gpu/cl/operators/ClLogicalNot.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClElementwiseUnaryKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClLogicalNot::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst);
auto k = std::make_unique<kernels::ClElementWiseUnaryKernel>();
k->configure(compile_context, src, dst, ElementWiseUnary::LOGICAL_NOT);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClMul.cpp b/src/gpu/cl/operators/ClMul.cpp
index 59d2b96bee..2066f0cfaa 100644
--- a/src/gpu/cl/operators/ClMul.cpp
+++ b/src/gpu/cl/operators/ClMul.cpp
@@ -27,6 +27,8 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClMulKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
@@ -34,6 +36,7 @@ namespace opencl
void ClMul::configure(const CLCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst, float scale,
ConvertPolicy overflow_policy, RoundingPolicy rounding_policy, const ActivationLayerInfo &act_info)
{
+ ARM_COMPUTE_LOG_PARAMS(src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
auto k = std::make_unique<kernels::ClMulKernel>();
k->configure(compile_context, src1, src2, dst, scale, overflow_policy, rounding_policy, act_info);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClPRelu.cpp b/src/gpu/cl/operators/ClPRelu.cpp
index 05717d5bb7..cf4ebe6083 100644
--- a/src/gpu/cl/operators/ClPRelu.cpp
+++ b/src/gpu/cl/operators/ClPRelu.cpp
@@ -22,8 +22,11 @@
* SOFTWARE.
*/
#include "src/gpu/cl/operators/ClPRelu.h"
+
#include "src/gpu/cl/kernels/ClElementwiseKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
@@ -31,6 +34,7 @@ namespace opencl
using KernelType = kernels::ClArithmeticKernel;
void ClPRelu::configure(const CLCompileContext &compile_context, ITensorInfo *input, ITensorInfo *alpha, ITensorInfo *output)
{
+ ARM_COMPUTE_LOG_PARAMS(input, alpha, output);
auto k = std::make_unique<KernelType>();
k->configure(compile_context, ArithmeticOperation::PRELU, input, alpha, (output == nullptr ? input : output));
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClPermute.cpp b/src/gpu/cl/operators/ClPermute.cpp
index ed74e22b6c..ed56f97bfe 100644
--- a/src/gpu/cl/operators/ClPermute.cpp
+++ b/src/gpu/cl/operators/ClPermute.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClPermuteKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClPermute::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst, const PermutationVector &perm)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst, perm);
auto k = std::make_unique<kernels::ClPermuteKernel>();
k->configure(compile_context, src, dst, perm);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClPool2d.cpp b/src/gpu/cl/operators/ClPool2d.cpp
index a5b18a2340..3da90b8ced 100644
--- a/src/gpu/cl/operators/ClPool2d.cpp
+++ b/src/gpu/cl/operators/ClPool2d.cpp
@@ -28,6 +28,8 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClPool2dKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
@@ -35,6 +37,8 @@ namespace opencl
void ClPool2d::configure(const ClCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const PoolingLayerInfo &info, ITensorInfo *indices)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
+ ARM_COMPUTE_LOG_PARAMS(src, dst, info, indices);
+
// Configure pooling kernel
auto k = std::make_unique<kernels::ClPool2dKernel>();
k->set_target(CLScheduler::get().target());
diff --git a/src/gpu/cl/operators/ClQuantize.cpp b/src/gpu/cl/operators/ClQuantize.cpp
index 915e0fdef0..47ae5cea47 100644
--- a/src/gpu/cl/operators/ClQuantize.cpp
+++ b/src/gpu/cl/operators/ClQuantize.cpp
@@ -28,12 +28,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClQuantizeKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClQuantize::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst);
auto k = std::make_unique<kernels::ClQuantizeKernel>();
k->configure(compile_context, src, dst);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClReshape.cpp b/src/gpu/cl/operators/ClReshape.cpp
index 2c1d1817d1..560966f4fc 100644
--- a/src/gpu/cl/operators/ClReshape.cpp
+++ b/src/gpu/cl/operators/ClReshape.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClReshapeKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClReshape::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst);
auto k = std::make_unique<kernels::ClReshapeKernel>();
k->configure(compile_context, src, dst);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClScale.cpp b/src/gpu/cl/operators/ClScale.cpp
index 6dab66786a..0798b19ca0 100644
--- a/src/gpu/cl/operators/ClScale.cpp
+++ b/src/gpu/cl/operators/ClScale.cpp
@@ -29,6 +29,8 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClScaleKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
@@ -36,6 +38,8 @@ namespace opencl
void ClScale::configure(const CLCompileContext &compile_context, ITensorInfo *src, ITensorInfo *dst, const ScaleKernelInfo &info)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
+ ARM_COMPUTE_LOG_PARAMS(src, dst, info);
+
// Configure Scale kernel
auto k = std::make_unique<kernels::ClScaleKernel>();
k->set_target(CLScheduler::get().target());
diff --git a/src/gpu/cl/operators/ClSoftmax.cpp b/src/gpu/cl/operators/ClSoftmax.cpp
index 6b728f5354..03809553a3 100644
--- a/src/gpu/cl/operators/ClSoftmax.cpp
+++ b/src/gpu/cl/operators/ClSoftmax.cpp
@@ -30,6 +30,8 @@
#include "src/gpu/cl/utils/ClAuxTensorHandler.h"
#include "support/Cast.h"
+#include "src/common/utils/Log.h"
+
using namespace arm_compute::experimental;
namespace arm_compute
@@ -53,6 +55,7 @@ ClSoftmax::ClSoftmax()
void ClSoftmax::configure(const CLCompileContext &compile_context, const ITensorInfo &src, ITensorInfo &dst, const SoftmaxKernelInfo &info)
{
ARM_COMPUTE_ERROR_THROW_ON(validate(src, dst, info));
+ ARM_COMPUTE_LOG_PARAMS(src, dst, info);
const size_t actual_axis = static_cast<size_t>(wrap_around(info.axis, static_cast<int32_t>(src.num_dimensions())));
diff --git a/src/gpu/cl/operators/ClSub.cpp b/src/gpu/cl/operators/ClSub.cpp
index b94fef3cf9..53be04a70f 100644
--- a/src/gpu/cl/operators/ClSub.cpp
+++ b/src/gpu/cl/operators/ClSub.cpp
@@ -26,6 +26,8 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClElementwiseKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
@@ -33,6 +35,7 @@ namespace opencl
void ClSub::configure(const ClCompileContext &compile_context, ITensorInfo *src1, ITensorInfo *src2, ITensorInfo *dst,
ConvertPolicy policy, const ActivationLayerInfo &act_info)
{
+ ARM_COMPUTE_LOG_PARAMS(src1, src2, dst, policy, act_info);
auto k = std::make_unique<kernels::ClSaturatedArithmeticKernel>();
k->configure(compile_context, ArithmeticOperation::SUB, src1, src2, dst, policy, act_info);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClTranspose.cpp b/src/gpu/cl/operators/ClTranspose.cpp
index 6429451a42..26feffe2b9 100644
--- a/src/gpu/cl/operators/ClTranspose.cpp
+++ b/src/gpu/cl/operators/ClTranspose.cpp
@@ -26,12 +26,15 @@
#include "src/gpu/cl/ClCompileContext.h"
#include "src/gpu/cl/kernels/ClTransposeKernel.h"
+#include "src/common/utils/Log.h"
+
namespace arm_compute
{
namespace opencl
{
void ClTranspose::configure(const ClCompileContext &compile_context, const ITensorInfo *src, ITensorInfo *dst)
{
+ ARM_COMPUTE_LOG_PARAMS(src, dst);
auto k = std::make_unique<kernels::ClTransposeKernel>();
k->configure(compile_context, src, dst);
_kernel = std::move(k);
diff --git a/src/gpu/cl/operators/ClWinogradConv2d.cpp b/src/gpu/cl/operators/ClWinogradConv2d.cpp
index fbf6442a80..ffa1effc74 100644
--- a/src/gpu/cl/operators/ClWinogradConv2d.cpp
+++ b/src/gpu/cl/operators/ClWinogradConv2d.cpp
@@ -36,6 +36,8 @@
#include "src/gpu/cl/kernels/ClWinogradInputTransformKernel.h"
#include "src/gpu/cl/kernels/ClWinogradOutputTransformKernel.h"
#include "src/gpu/cl/utils/ClAuxTensorHandler.h"
+
+#include "src/common/utils/Log.h"
#include "support/Cast.h"
using namespace arm_compute::experimental;
@@ -173,6 +175,8 @@ void ClWinogradConv2d::configure(const ClCompileContext &compile_context, ITenso
const PadStrideInfo &conv_info, const ActivationLayerInfo &act_info, bool enable_fast_math)
{
ARM_COMPUTE_ERROR_THROW_ON(validate_arguments(src, weights, biases, dst, conv_info, act_info, enable_fast_math));
+ ARM_COMPUTE_LOG_PARAMS(src, weights, biases, dst, conv_info, act_info, enable_fast_math);
+
// Get indices for the width and height
const size_t idx_width = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::WIDTH);
const size_t idx_height = get_data_layout_dimension_index(src->data_layout(), DataLayoutDimension::HEIGHT);