aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion
diff options
context:
space:
mode:
authorFelix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-27 17:46:17 +0100
committerfelixjohnny.thomasmathibalan <felixjohnny.thomasmathibalan@arm.com>2023-09-28 12:08:05 +0000
commitafd38f0c617d6f89b2b4532c6c44f116617e2b6f (patch)
tree03bc7d5a762099989b16a656fa8d397b490ed70e /src/dynamic_fusion
parentbdcb4c148ee2fdeaaddf4cf1e57bbb0de02bb894 (diff)
downloadComputeLibrary-afd38f0c617d6f89b2b4532c6c44f116617e2b6f.tar.gz
Apply clang-format on repository
Code is formatted as per a revised clang format configuration file(not part of this delivery). Version 14.0.6 is used. Exclusion List: - files with .cl extension - files that are not strictly C/C++ (e.g. Android.bp, Sconscript ...) And the following directories - compute_kernel_writer/validation/ - tests/ - include/ - src/core/NEON/kernels/convolution/ - src/core/NEON/kernels/arm_gemm/ - src/core/NEON/kernels/arm_conv/ - data/ There will be a follow up for formatting of .cl files and the files under tests/ and compute_kernel_writer/validation/. Signed-off-by: Felix Thomasmathibalan <felixjohnny.thomasmathibalan@arm.com> Change-Id: Ib7eb1fcf4e7537b9feaefcfc15098a804a3fde0a Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10391 Benchmark: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Diffstat (limited to 'src/dynamic_fusion')
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp61
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h11
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp81
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp7
-rw-r--r--src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h5
-rw-r--r--src/dynamic_fusion/sketch/ArgumentPack.h45
-rw-r--r--src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp3
-rw-r--r--src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp1
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h15
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp20
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h18
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp105
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h23
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp10
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h5
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp10
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp48
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h4
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp16
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h6
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp4
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h11
-rw-r--r--src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h56
-rw-r--r--src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp7
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h6
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp21
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h4
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp12
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp1
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h2
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp20
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h8
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp34
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp44
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp49
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp84
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h14
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp171
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h8
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp76
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp10
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h6
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h31
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h35
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h3
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h7
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h15
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp12
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h18
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp30
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp57
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h34
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp64
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h26
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp65
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h12
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h7
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h7
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp49
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h21
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp12
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h5
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp14
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h13
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp20
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h5
-rw-r--r--src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h22
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp19
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp52
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp44
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp65
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp90
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp13
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp19
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp43
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp27
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp40
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp31
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp38
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp19
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp35
-rw-r--r--src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp19
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp36
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h17
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h8
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp26
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp30
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp81
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp112
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp94
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h5
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp57
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h4
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp35
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp92
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp28
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h4
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp56
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp16
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h1
-rw-r--r--src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp59
-rw-r--r--src/dynamic_fusion/sketch/utils/DependencyGraph.h182
-rw-r--r--src/dynamic_fusion/utils/Utils.h16
115 files changed, 1637 insertions, 1676 deletions
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
index 15a5632d0b..9ca20fa152 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.cpp
@@ -22,14 +22,15 @@
* SOFTWARE.
*/
#include "ClKernelRuntime.h"
+
#include "arm_compute/core/CL/ICLTensor.h"
+
#include "src/core/CL/CLUtils.h"
#ifdef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h"
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
#include "src/gpu/cl/ClKernelLibrary.h"
-
#include "support/Cast.h"
namespace arm_compute
{
@@ -43,13 +44,12 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe
{
// Create kernel from kernel source string
opencl::ClKernelLibrary &klib = opencl::ClKernelLibrary::get();
- _kernel = static_cast<cl::Kernel>(compile_ctx.create_kernel(code.name(),
- code.name(), // program name has to be provided to differentiate between different unfusable components' kernels.
- // Each program contains exactly one kernel
- code.code(),
- klib.kernel_path() /* Kernel path: Used in cases of embedded kernels */,
- code.build_options().options(),
- false /* Is source binary */));
+ _kernel = static_cast<cl::Kernel>(compile_ctx.create_kernel(
+ code.name(),
+ code.name(), // program name has to be provided to differentiate between different unfusable components' kernels.
+ // Each program contains exactly one kernel
+ code.code(), klib.kernel_path() /* Kernel path: Used in cases of embedded kernels */,
+ code.build_options().options(), false /* Is source binary */));
// Configure execution window
IClKernel::configure_internal(code.window());
@@ -63,11 +63,15 @@ void ClKernelRuntime::configure(const ClCompileContext &compile_ctx, const GpuKe
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
-inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector<cl::Image2D> &cl_images)
+inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx,
+ const GpuKernelArgumentInfo &arg,
+ const ICLTensor *tensor,
+ const Window &arg_slice,
+ std::vector<cl::Image2D> &cl_images)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
- switch(arg.type)
+ switch (arg.type)
{
case GpuKernelArgumentInfo::Type::Scalar:
{
@@ -95,9 +99,13 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
}
case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
{
- const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));
+ const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
+ tensor->info()->dimension(2) *
+ tensor->info()->dimension(3));
const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
- cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
+ cl::Image2D tensor_image2d =
+ create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
+ tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
cl_images.push_back(tensor_image2d);
_kernel.setArg(idx++, tensor_image2d);
break;
@@ -111,9 +119,13 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
}
case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
{
- const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) * tensor->info()->dimension(2) * tensor->info()->dimension(3));
+ const TensorShape shape2d(tensor->info()->dimension(0) / 4, tensor->info()->dimension(1) *
+ tensor->info()->dimension(2) *
+ tensor->info()->dimension(3));
const size_t image_row_pitch = tensor->info()->strides_in_bytes()[1];
- cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d, tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
+ cl::Image2D tensor_image2d =
+ create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(), shape2d,
+ tensor->info()->data_type(), image_row_pitch, CLImage2DType::ReadOnly);
cl_images.push_back(tensor_image2d);
_kernel.setArg(idx++, tensor_image2d);
_kernel.setArg<cl_uint>(idx++, static_cast<unsigned int>(tensor->info()->strides_in_bytes()[2]));
@@ -142,8 +154,9 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
const size_t image_h = tensor->info()->tensor_shape().total_size_upper(1);
const size_t image_stride_y = tensor->info()->strides_in_bytes()[1];
- cl::Image2D tensor_image2d = create_image2d_from_buffer(CLKernelLibrary::get().context(), tensor->cl_buffer(),
- TensorShape(image_w, image_h), tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);
+ cl::Image2D tensor_image2d = create_image2d_from_buffer(
+ CLKernelLibrary::get().context(), tensor->cl_buffer(), TensorShape(image_w, image_h),
+ tensor->info()->data_type(), image_stride_y, CLImage2DType::ReadOnly);
cl_images.push_back(tensor_image2d);
_kernel.setArg(idx++, tensor_image2d);
@@ -170,13 +183,16 @@ inline void ClKernelRuntime::add_tensor_argument(unsigned int &idx, const GpuKer
}
#else // ACL_INTERNAL_TEST_CKW_IN_DF
-inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector<cl::Image2D> &cl_images)
+inline void ClKernelRuntime::add_kernel_argument(unsigned int &idx,
+ const GpuKernelArgumentBinding &arg,
+ const ICLTensor *tensor,
+ std::vector<cl::Image2D> &cl_images)
{
- switch(arg.type())
+ switch (arg.type())
{
case GpuKernelArgumentBinding::Type::TensorStorage:
{
- switch(arg.tensor_storage_type())
+ switch (arg.tensor_storage_type())
{
case TensorStorageType::ClBufferUint8Ptr:
{
@@ -238,7 +254,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
// CLImages created from tensor arguments. Need to be retained until enqueue
std::vector<cl::Image2D> cl_images;
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- for(auto id_arg : _arguments)
+ for (auto id_arg : _arguments)
{
const auto arg = id_arg.second;
auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(id_arg.first));
@@ -248,7 +264,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
}
#else // ACL_INTERNAL_TEST_CKW_IN_DF
- for(const auto &arg : _arguments)
+ for (const auto &arg : _arguments)
{
auto tensor = utils::cast::polymorphic_downcast<ICLTensor *>(tensors.get_tensor(arg.id()));
ARM_COMPUTE_ERROR_ON_NULLPTR(tensor);
@@ -259,8 +275,7 @@ void ClKernelRuntime::run_op(ITensorPack &tensors, const Window &window, cl::Com
// Dispatch kernel
enqueue(queue, *this, slice, lws_hint(), use_dummy_work_items);
- }
- while(skip_sliding_window && window.slide_window_slice_3D(slice));
+ } while (skip_sliding_window && window.slide_window_slice_3D(slice));
}
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
index 92e73503ce..e78567eb9d 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h
@@ -68,7 +68,11 @@ private:
* @param[in] arg_slice Window the kernel will be run on
* @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued)
*/
- inline void add_tensor_argument(unsigned int &idx, const GpuKernelArgumentInfo &arg, const ICLTensor *tensor, const Window &arg_slice, std::vector<cl::Image2D> &cl_images);
+ inline void add_tensor_argument(unsigned int &idx,
+ const GpuKernelArgumentInfo &arg,
+ const ICLTensor *tensor,
+ const Window &arg_slice,
+ std::vector<cl::Image2D> &cl_images);
#else // ACL_INTERNAL_TEST_CKW_IN_DF
/** Set a kernel argument as part of a tensor
*
@@ -77,7 +81,10 @@ private:
* @param[in] tensor Tensor of which the kernel argument @p arg is a part of
* @param[out] cl_images Extra cl images created from the tensor (will need to be retained until the kernel is enqueued)
*/
- inline void add_kernel_argument(unsigned int &idx, const GpuKernelArgumentBinding &arg, const ICLTensor *tensor, std::vector<cl::Image2D> &cl_images);
+ inline void add_kernel_argument(unsigned int &idx,
+ const GpuKernelArgumentBinding &arg,
+ const ICLTensor *tensor,
+ std::vector<cl::Image2D> &cl_images);
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
private:
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp b/src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp
index cd21b10180..ba39ff4c9d 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ClWorkloadRuntime.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/runtime/CL/CLTensor.h"
+
#include "src/dynamic_fusion/runtime/gpu/cl/ClKernelRuntime.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
@@ -55,14 +56,14 @@ public:
{
DataView() = default;
DataView(CLTensor *tensor, const TensorInfo &tensor_info, const AuxMemoryInfo &memory_info)
- : tensor{ tensor }, tensor_info{ tensor_info }, memory_info{ memory_info }
+ : tensor{tensor}, tensor_info{tensor_info}, memory_info{memory_info}
{
}
- ~DataView() = default;
- DataView(const DataView &other) = default;
+ ~DataView() = default;
+ DataView(const DataView &other) = default;
DataView &operator=(const DataView &other) = default;
DataView(DataView &&other) = default;
- DataView &operator=(DataView &&other) = default;
+ DataView &operator=(DataView &&other) = default;
CLTensor *tensor{}; /**< Pointer to the auxiliary tensor */
TensorInfo tensor_info{}; /**< Associated tensor info */
AuxMemoryInfo memory_info{}; /**< Memory requirement */
@@ -92,7 +93,7 @@ private:
{
const auto t_id = tensor_info.id();
auto find_tensor_pair = _owned_tensors.find(t_id);
- if(find_tensor_pair != _owned_tensors.end())
+ if (find_tensor_pair != _owned_tensors.end())
{
return find_tensor_pair->second.get();
}
@@ -107,7 +108,7 @@ private:
}
std::map<ITensorInfo::Id, std::unique_ptr<CLTensor>> _owned_tensors{};
- std::vector<DataView> _tensors{};
+ std::vector<DataView> _tensors{};
};
/** Construct auxiliary tensors required by @ref GpuWorkloadSourceCode
*
@@ -120,12 +121,12 @@ private:
*/
Status create_aux_tensors(ClAuxTensors *aux_tensors, const GpuWorkloadSourceCode &code)
{
- for(auto t_id : code.tensors())
+ for (auto t_id : code.tensors())
{
// Get tensor object
const auto workload_arg = code.query_tensor(t_id);
ICLTensor *tensor_object = nullptr;
- if(workload_arg->memory_descriptor()->memory_type == MemoryType::Auxiliary)
+ if (workload_arg->memory_descriptor()->memory_type == MemoryType::Auxiliary)
{
// Create aux tensor CLTensor object
const TensorInfo tensor_info = *workload_arg->tensor_info();
@@ -133,7 +134,7 @@ Status create_aux_tensors(ClAuxTensors *aux_tensors, const GpuWorkloadSourceCode
const auto aux_memory_info = workload_arg->memory_descriptor()->aux_memory_info;
tensor_object = aux_tensors->add_aux_tensor(tensor_info, aux_memory_info);
- if(tensor_object == nullptr)
+ if (tensor_object == nullptr)
{
return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Failed to construct an auxiliary tensor");
}
@@ -156,7 +157,7 @@ public:
ITensorPack *find_tensor_pack(UnitWorkloadId uwk_id)
{
auto tensor_pack = _tensor_packs.find(uwk_id);
- if(tensor_pack != _tensor_packs.end())
+ if (tensor_pack != _tensor_packs.end())
{
return &(tensor_pack->second);
}
@@ -173,7 +174,10 @@ public:
return _tensor_packs.at(uwk_id);
}
- friend Status create_tensor_lut(ClTensorLUT *tensor_lut, const GpuWorkloadSourceCode &code, const std::vector<CLTensor *> &user_tensors, const ClAuxTensors &aux_tensors);
+ friend Status create_tensor_lut(ClTensorLUT *tensor_lut,
+ const GpuWorkloadSourceCode &code,
+ const std::vector<CLTensor *> &user_tensors,
+ const ClAuxTensors &aux_tensors);
private:
/** Add a tensor pack and associate it with @ref UnitWorkloadId @p uwk_id
@@ -197,19 +201,22 @@ private:
*
* @return Status
*/
-Status create_tensor_lut(ClTensorLUT *tensor_lut, const GpuWorkloadSourceCode &code, const std::vector<CLTensor *> &user_tensors, const ClAuxTensors &aux_tensors)
+Status create_tensor_lut(ClTensorLUT *tensor_lut,
+ const GpuWorkloadSourceCode &code,
+ const std::vector<CLTensor *> &user_tensors,
+ const ClAuxTensors &aux_tensors)
{
// Combine user tensors and aux tensors
std::map<ITensorInfo::Id, CLTensor *> tensor_map;
- for(auto tensor : user_tensors)
+ for (auto tensor : user_tensors)
{
const auto t_id = tensor->info()->id();
- if(tensor_map.find(t_id) != tensor_map.end())
+ if (tensor_map.find(t_id) != tensor_map.end())
{
// In case of elementwise in-place: give another Id to the In/Out tensor when passed again
std::vector<ITensorInfo::Id> ids;
- for(auto &t : tensor_map)
+ for (auto &t : tensor_map)
{
ids.push_back(t.first);
}
@@ -221,11 +228,11 @@ Status create_tensor_lut(ClTensorLUT *tensor_lut, const GpuWorkloadSourceCode &c
tensor_map[t_id] = tensor;
}
}
- for(const auto &data : aux_tensors.get_tensors())
+ for (const auto &data : aux_tensors.get_tensors())
{
const auto t_id = data.tensor_info.id();
const auto tensor = data.tensor;
- if(tensor_map.find(t_id) != tensor_map.end())
+ if (tensor_map.find(t_id) != tensor_map.end())
{
return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Clashing tensor ids");
}
@@ -233,25 +240,25 @@ Status create_tensor_lut(ClTensorLUT *tensor_lut, const GpuWorkloadSourceCode &c
}
// Add tensor objects into corresponding tensor packs
- for(auto id_tensor : tensor_map)
+ for (auto id_tensor : tensor_map)
{
const auto t_id = id_tensor.first;
const auto tensor_object = id_tensor.second;
- if(tensor_object == nullptr)
+ if (tensor_object == nullptr)
{
return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "Trying to add a nullptr into the tensor packs");
}
- if(tensor_object->allocator()->info().total_size() == 0U)
+ if (tensor_object->allocator()->info().total_size() == 0U)
{
return ARM_COMPUTE_CREATE_ERROR(ErrorCode::RUNTIME_ERROR, "No allocated memory found in tensor");
}
- for(auto uwk_id : code.get_unit_workloads_from_tensor(t_id))
+ for (auto uwk_id : code.get_unit_workloads_from_tensor(t_id))
{
ITensorPack *tensor_pack = tensor_lut->find_tensor_pack(uwk_id);
- if(tensor_pack == nullptr)
+ if (tensor_pack == nullptr)
{
- tensor_lut->add_tensor_pack(uwk_id, ITensorPack{ { t_id, tensor_object } });
+ tensor_lut->add_tensor_pack(uwk_id, ITensorPack{{t_id, tensor_object}});
}
else
{
@@ -269,15 +276,14 @@ struct ClWorkloadRuntime::Implementation
{
std::map<UnitWorkloadId, std::unique_ptr<ClKernelRuntime>> _kernels{};
std::map<UnitWorkloadId, std::unique_ptr<ClKernelRuntime>> _kernels_prep{};
- bool _is_configured{ false };
- bool _is_prepared{ false };
- ClTensorLUT _tensor_lut{};
- ClAuxTensors _aux_tensors{};
- GpuWorkloadSourceCode _source_code{};
+ bool _is_configured{false};
+ bool _is_prepared{false};
+ ClTensorLUT _tensor_lut{};
+ ClAuxTensors _aux_tensors{};
+ GpuWorkloadSourceCode _source_code{};
};
-ClWorkloadRuntime::ClWorkloadRuntime()
- : _impl{ std::make_unique<Implementation>() }
+ClWorkloadRuntime::ClWorkloadRuntime() : _impl{std::make_unique<Implementation>()}
{
}
@@ -286,18 +292,19 @@ ClWorkloadRuntime::~ClWorkloadRuntime() = default;
Status ClWorkloadRuntime::configure(const GpuWorkloadSketch &sketch)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(_impl->_is_configured, "ClWorkloadRuntime cannot be re-configured");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(sketch.gpu_context()->gpu_language() != GpuLanguage::OpenCL, "ClWorkloadRuntime cannot be configured with non-OpenCL workload sketch");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(sketch.gpu_context()->gpu_language() != GpuLanguage::OpenCL,
+ "ClWorkloadRuntime cannot be configured with non-OpenCL workload sketch");
// Generate source code
_impl->_source_code = sketch.implementation().generate_source_code();
// Configure unit workload from source code
- for(auto uwk_id : _impl->_source_code.unit_workloads())
+ for (auto uwk_id : _impl->_source_code.unit_workloads())
{
const auto work = _impl->_source_code.query_unit_workload(uwk_id);
const auto stage = work.stage().stage;
auto k = std::make_unique<ClKernelRuntime>();
k->configure(*sketch.gpu_context()->cl_compile_context(), work.code());
- switch(stage)
+ switch (stage)
{
case UnitWorkloadStage::Stage::Run:
{
@@ -323,9 +330,9 @@ Status ClWorkloadRuntime::configure(const GpuWorkloadSketch &sketch)
void ClWorkloadRuntime::prepare()
{
- if(!_impl->_is_prepared)
+ if (!_impl->_is_prepared)
{
- for(auto &id_kernel_pair : _impl->_kernels_prep)
+ for (auto &id_kernel_pair : _impl->_kernels_prep)
{
const bool flush_queue = false;
const auto uwk_id = id_kernel_pair.first;
@@ -344,7 +351,7 @@ Status ClWorkloadRuntime::run(const std::vector<CLTensor *> &tensors)
const auto st = create_tensor_lut(&_impl->_tensor_lut, _impl->_source_code, tensors, _impl->_aux_tensors);
ARM_COMPUTE_RETURN_ON_ERROR(st);
prepare();
- for(auto &id_kernel_pair : _impl->_kernels)
+ for (auto &id_kernel_pair : _impl->_kernels)
{
// Flush the command queue on the last kernel
const bool flush_queue = false;
@@ -358,7 +365,7 @@ Status ClWorkloadRuntime::run(const std::vector<CLTensor *> &tensors)
std::vector<std::tuple<CLTensor *, TensorInfo, AuxMemoryInfo>> ClWorkloadRuntime::get_auxiliary_tensors()
{
std::vector<std::tuple<CLTensor *, TensorInfo, AuxMemoryInfo>> aux_tensors;
- for(const auto &data : _impl->_aux_tensors.get_tensors())
+ for (const auto &data : _impl->_aux_tensors.get_tensors())
{
aux_tensors.emplace_back(data.tensor, data.tensor_info, data.memory_info);
}
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp
index 84fb279237..7044b0ea66 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp
+++ b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.cpp
@@ -30,14 +30,17 @@ namespace experimental
{
namespace dynamic_fusion
{
-void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, const ICLTensor *tensor, TensorComponentType component)
+void cl_add_tensor_component_argument(cl::Kernel &kernel,
+ unsigned int &idx,
+ const ICLTensor *tensor,
+ TensorComponentType component)
{
ARM_COMPUTE_ERROR_ON(tensor == nullptr);
const auto *info = tensor->info();
const auto &strides = info->strides_in_bytes();
- switch(component)
+ switch (component)
{
case TensorComponentType::OffsetFirstElement:
kernel.setArg<cl_uint>(idx++, info->offset_first_element_in_bytes());
diff --git a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h
index 4cbb157a48..306d547acb 100644
--- a/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h
+++ b/src/dynamic_fusion/runtime/gpu/cl/ckw_driver/GpuCkwKernelArgumentsHelpers.h
@@ -42,7 +42,10 @@ namespace dynamic_fusion
* @param[in] tensor Tensor from which to access the tensor component.
* @param[in] component Tensor component to select such as tensor dimensions, strides, etc.
*/
-void cl_add_tensor_component_argument(cl::Kernel &kernel, unsigned int &idx, const ICLTensor *tensor, TensorComponentType component);
+void cl_add_tensor_component_argument(cl::Kernel &kernel,
+ unsigned int &idx,
+ const ICLTensor *tensor,
+ TensorComponentType component);
/** Add an OpenCL buffer object to the kernel's arguments at the specified index @p idx.
*
diff --git a/src/dynamic_fusion/sketch/ArgumentPack.h b/src/dynamic_fusion/sketch/ArgumentPack.h
index f118d7d851..3bf380b1ec 100644
--- a/src/dynamic_fusion/sketch/ArgumentPack.h
+++ b/src/dynamic_fusion/sketch/ArgumentPack.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_ARGUMENTPACK
#include "arm_compute/core/experimental/Types.h"
+
#include <unordered_map>
#include <vector>
@@ -52,26 +53,21 @@ public:
*/
struct PackElement
{
- PackElement() = default;
- PackElement(const PackElement &elem) = default;
+ PackElement() = default;
+ PackElement(const PackElement &elem) = default;
PackElement &operator=(const PackElement &elem) = default;
PackElement(PackElement &&elem) = default;
- PackElement &operator=(PackElement &&elem) = default;
- PackElement(Id id, T *tensor)
- : id(id), tensor(tensor), ctensor(nullptr)
+ PackElement &operator=(PackElement &&elem) = default;
+ PackElement(Id id, T *tensor) : id(id), tensor(tensor), ctensor(nullptr)
{
}
- PackElement(Id id, const T *ctensor)
- : id(id), tensor(nullptr), ctensor(ctensor)
+ PackElement(Id id, const T *ctensor) : id(id), tensor(nullptr), ctensor(ctensor)
{
}
- Id id{ ACL_UNKNOWN }; /**< Argument id within the pack */
- T *tensor{ nullptr }; /**< Non-const pointer to tensor-related object */
- const T *ctensor
- {
- nullptr
- }; /**< Const pointer to tensor-related object */
+ Id id{ACL_UNKNOWN}; /**< Argument id within the pack */
+ T *tensor{nullptr}; /**< Non-const pointer to tensor-related object */
+ const T *ctensor{nullptr}; /**< Const pointer to tensor-related object */
};
public:
@@ -88,10 +84,9 @@ public:
/** Allow instances of this class to be moved */
ArgumentPack<T> &operator=(ArgumentPack<T> &&other) = default;
/** Initializer list Constructor */
- ArgumentPack(const std::initializer_list<PackElement> &l)
- : _pack{}
+ ArgumentPack(const std::initializer_list<PackElement> &l) : _pack{}
{
- for(const auto &e : l)
+ for (const auto &e : l)
{
_pack[e.id] = e;
}
@@ -134,7 +129,7 @@ public:
const T *get_const_tensor(Id id) const
{
auto it = _pack.find(id);
- if(it != _pack.end())
+ if (it != _pack.end())
{
return it->second.ctensor != nullptr ? it->second.ctensor : it->second.tensor;
}
@@ -171,10 +166,10 @@ public:
std::vector<T *> get_src_tensors()
{
std::vector<T *> src_tensors{};
- for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
+ for (int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
{
auto tensor = get_tensor(static_cast<TensorType>(id));
- if(tensor != nullptr)
+ if (tensor != nullptr)
{
src_tensors.push_back(tensor);
}
@@ -188,10 +183,10 @@ public:
std::vector<const T *> get_const_src_tensors() const
{
std::vector<const T *> src_tensors{};
- for(int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
+ for (int id = static_cast<int>(TensorType::ACL_SRC); id <= static_cast<int>(TensorType::ACL_SRC_END); ++id)
{
auto tensor = get_const_tensor(static_cast<TensorType>(id));
- if(tensor != nullptr)
+ if (tensor != nullptr)
{
src_tensors.push_back(tensor);
}
@@ -205,10 +200,10 @@ public:
std::vector<T *> get_dst_tensors()
{
std::vector<T *> dst_tensors{};
- for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
+ for (int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
{
auto tensor = get_tensor(static_cast<TensorType>(id));
- if(tensor != nullptr)
+ if (tensor != nullptr)
{
dst_tensors.push_back(tensor);
}
@@ -222,10 +217,10 @@ public:
std::vector<const T *> get_const_dst_tensors() const
{
std::vector<const T *> dst_tensors{};
- for(int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
+ for (int id = static_cast<int>(TensorType::ACL_DST); id <= static_cast<int>(TensorType::ACL_DST_END); ++id)
{
auto tensor = get_const_tensor(static_cast<TensorType>(id));
- if(tensor != nullptr)
+ if (tensor != nullptr)
{
dst_tensors.push_back(tensor);
}
diff --git a/src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp b/src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp
index 3a5657e07b..6f3816568c 100644
--- a/src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp
+++ b/src/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.cpp
@@ -69,7 +69,8 @@ uint32_t DepthwiseConv2dAttributes::depth_multiplier() const
return _depth_multiplier;
}
-DepthwiseConv2dAttributes &DepthwiseConv2dAttributes::dimension_rounding_type(const DimensionRoundingType &dimension_rounding_type)
+DepthwiseConv2dAttributes &
+DepthwiseConv2dAttributes::dimension_rounding_type(const DimensionRoundingType &dimension_rounding_type)
{
_dimension_rounding_type = dimension_rounding_type;
return *this;
diff --git a/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp b/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp
index c28791f5fe..80f65f926a 100644
--- a/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp
+++ b/src/dynamic_fusion/sketch/attributes/Pool2dAttributes.cpp
@@ -23,6 +23,7 @@
*/
#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+
#include "arm_compute/core/Size2D.h"
namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
index 226e1a2df3..03817173f4 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h
@@ -61,11 +61,10 @@ struct GpuKernelArgumentInfo
/** Default constructor */
GpuKernelArgumentInfo() = default;
/** Constructor */
- GpuKernelArgumentInfo(Type type)
- : type{ type }
+ GpuKernelArgumentInfo(Type type) : type{type}
{
}
- Type type{ Type::Tensor_4D_t_Buffer };
+ Type type{Type::Tensor_4D_t_Buffer};
};
bool operator==(const GpuKernelArgumentInfo &info0, const GpuKernelArgumentInfo &info1);
/** Kernel argument information linked with its corresponding @ref ITensorInfo
@@ -79,10 +78,8 @@ public:
* @param[in] tensor_info Associated @ref ITensorInfo
* @param[in] kernel_arg_info Associated @ref GpuKernelArgumentInfo
*/
- GpuKernelArgument(const ITensorInfo &tensor_info,
- const GpuKernelArgumentInfo &kernel_arg_info)
- : _tensor_info{ tensor_info },
- _kernel_arg_info{ kernel_arg_info }
+ GpuKernelArgument(const ITensorInfo &tensor_info, const GpuKernelArgumentInfo &kernel_arg_info)
+ : _tensor_info{tensor_info}, _kernel_arg_info{kernel_arg_info}
{
}
/** Get workload tensor id */
@@ -200,12 +197,12 @@ public:
TensorComponent /** @ref TensorComponentType */
};
GpuKernelArgumentBinding(ITensorInfo::Id id, TensorStorageType storage)
- : _type{ Type::TensorStorage }, _id{ id }, _value{}
+ : _type{Type::TensorStorage}, _id{id}, _value{}
{
_value.tensor_storage_type = storage;
}
GpuKernelArgumentBinding(ITensorInfo::Id id, TensorComponentType component)
- : _type{ Type::TensorComponent }, _id{ id }, _value{}
+ : _type{Type::TensorComponent}, _id{id}, _value{}
{
_value.tensor_component_type = component;
}
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
index 5a65ede38b..1a458c9862 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.cpp
@@ -31,35 +31,31 @@ namespace experimental
{
namespace dynamic_fusion
{
-std::vector<DependencyGraph::TensorId> GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
+std::vector<DependencyGraph::TensorId>
+GpuKernelComponentGraph::get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
{
std::vector<DependencyGraph::TensorId> tensor_ids{};
- std::transform(
- std::begin(tensors), std::end(tensors),
- std::back_inserter(tensor_ids),
- [](const auto & t)
- {
- return t->id();
- });
+ std::transform(std::begin(tensors), std::end(tensors), std::back_inserter(tensor_ids),
+ [](const auto &t) { return t->id(); });
return tensor_ids;
}
GpuKernelComponentGraph::GpuKernelComponentGraph(GpuWorkloadContext *context, GpuComponentServices *services)
- : _context{ context }, _services{ services }, _components{}, _tensors{}, _dependency_graph{}
+ : _context{context}, _services{services}, _components{}, _tensors{}, _dependency_graph{}
{
}
GpuKernelComponentStream GpuKernelComponentGraph::fuse(const MemoryDescriptorMap &mem_map) const
{
- GpuKernelComponentStream stream{ _context, _services, mem_map };
+ GpuKernelComponentStream stream{_context, _services, mem_map};
const auto op_seq = _dependency_graph.build_operators_sequence();
stream.new_component_group();
- for(auto op : op_seq)
+ for (auto op : op_seq)
{
const auto component = _components.at(op.op).get();
const auto success = stream.add_component(component);
- if(!success) // Assume first failure was because the root component is unfusable
+ if (!success) // Assume first failure was because the root component is unfusable
{
stream.new_component_group();
const auto success = stream.add_component(component);
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
index 85c9b45840..6f871a3c90 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h
@@ -70,21 +70,21 @@ public:
* @param[in] args Component arguments except for component id, which is auto-allocated
*/
template <typename T, typename... Args>
- void add_new_component(Args &&... args)
+ void add_new_component(Args &&...args)
{
- auto comp = _services->component_factory().create<T>(std::forward<Args>(args)...);
- ArgumentPack<ITensorInfo> tensors = comp->tensors();
+ auto comp = _services->component_factory().create<T>(std::forward<Args>(args)...);
+ ArgumentPack<ITensorInfo> tensors = comp->tensors();
const auto src_tensor_ids = get_tensor_ids(tensors.get_const_src_tensors());
const auto dst_tensor_ids = get_tensor_ids(tensors.get_const_dst_tensors());
- bool success = _dependency_graph.add_operator(comp->id(), src_tensor_ids, dst_tensor_ids);
+ bool success = _dependency_graph.add_operator(comp->id(), src_tensor_ids, dst_tensor_ids);
ARM_COMPUTE_UNUSED(success);
ARM_COMPUTE_ERROR_ON(!success);
_components[comp->id()] = std::move(comp);
- for(auto t : tensors.get_const_src_tensors())
+ for (auto t : tensors.get_const_src_tensors())
{
_tensors[t->id()] = t;
}
- for(auto t : tensors.get_const_dst_tensors())
+ for (auto t : tensors.get_const_dst_tensors())
{
_tensors[t->id()] = t;
}
@@ -99,11 +99,11 @@ public:
private:
static std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors);
- GpuWorkloadContext *_context;
- GpuComponentServices *_services;
+ GpuWorkloadContext *_context;
+ GpuComponentServices *_services;
std::map<ComponentId, std::unique_ptr<IGpuKernelComponent>> _components;
std::map<ITensorInfo::Id, const ITensorInfo *> _tensors;
- DependencyGraph _dependency_graph{};
+ DependencyGraph _dependency_graph{};
};
} // namespace dynamic_fusion
} // namespace experimental
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
index 81c3f0c800..5a6d125d96 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Validate.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include <algorithm>
@@ -37,86 +38,87 @@ namespace dynamic_fusion
{
bool GpuKernelComponentGroup::add_component(ComponentPtr component)
{
- ARM_COMPUTE_ERROR_ON_MSG(
- _finalized, "The component group has been finalized and cannot be altered.");
+ ARM_COMPUTE_ERROR_ON_MSG(_finalized, "The component group has been finalized and cannot be altered.");
// note: Constraint 1 is guaranteed as a precondition
// Constraint 2
- if(component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
+ if (component->type() != GpuComponentType::Output && _components.size() >= max_fused_components)
{
return false;
}
// Constraint 3.1: Pattern: (Unfusable + Output)
- if(!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable && component->type() != GpuComponentType::Output)
+ if (!_components.empty() && get_root_component()->type() == GpuComponentType::Unfusable &&
+ component->type() != GpuComponentType::Output)
{
return false;
}
// Constraint 3.2
- if(!_components.empty() && (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
+ if (!_components.empty() &&
+ (component->type() != GpuComponentType::Simple && component->type() != GpuComponentType::Output))
{
return false;
}
// Constraint 4
- if(component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
+ if (component->type() != GpuComponentType::Unfusable && component->tensors().get_const_dst_tensors().size() != 1U)
{
return false;
}
// Constraint 5
- if(!_components.empty() && !(get_root_component()->properties() == component->properties()))
+ if (!_components.empty() && !(get_root_component()->properties() == component->properties()))
{
return false;
}
// Constraint 7
- if(!_components.empty())
+ if (!_components.empty())
{
const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
const auto first_dst_tensor = root_dst_tensors[0];
const auto dst_tensors = component->tensors().get_const_dst_tensors();
- for(const auto &t : root_dst_tensors)
+ for (const auto &t : root_dst_tensors)
{
- if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
{
return false;
}
}
- for(const auto &t : dst_tensors)
+ for (const auto &t : dst_tensors)
{
- if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
{
return false;
}
}
}
// Constraint 8
- if(!_components.empty())
+ if (!_components.empty())
{
const auto root_dst_tensors = get_root_component()->tensors().get_const_dst_tensors();
ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
const auto dst_tensors = component->tensors().get_const_dst_tensors();
- for(const auto &t : root_dst_tensors)
+ for (const auto &t : root_dst_tensors)
{
- if(t->data_layout() != first_dst_tensor_layout)
+ if (t->data_layout() != first_dst_tensor_layout)
{
return false;
}
}
- for(const auto &t : dst_tensors)
+ for (const auto &t : dst_tensors)
{
- if(t->data_layout() != first_dst_tensor_layout)
+ if (t->data_layout() != first_dst_tensor_layout)
{
return false;
}
}
}
// Constraint 9
- if(component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
+ if (component->tensors().get_const_dst_tensors().size() >= max_dst_tensors)
{
return false;
}
// Constraint 9 corollary
- if(component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
+ if (component->type() == GpuComponentType::Output && _components.size() >= max_fused_components + max_dst_tensors)
{
return false;
}
@@ -126,36 +128,36 @@ bool GpuKernelComponentGroup::add_component(ComponentPtr component)
void GpuKernelComponentGroup::finalize()
{
- if(_finalized)
+ if (_finalized)
{
return;
}
_finalized = true;
- std::set<const ITensorInfo *> output_tensors;
+ std::set<const ITensorInfo *> output_tensors;
std::map<const ITensorInfo *, std::vector<const ITensorInfo *>> possible_tile_map;
- std::map<const ITensorInfo *, int32_t> tile_usages;
+ std::map<const ITensorInfo *, int32_t> tile_usages;
- for(auto component : _components)
+ for (auto component : _components)
{
- const auto tensors = component->tensors();
+ const auto tensors = component->tensors();
const auto src_tensors = tensors.get_const_src_tensors();
const auto dst_tensors = tensors.get_const_dst_tensors();
// Detect input, output and intermediate tensors.
- for(auto tensor : src_tensors)
+ for (auto tensor : src_tensors)
{
const auto output_tensors_it = output_tensors.find(tensor);
- if(output_tensors_it != output_tensors.end())
+ if (output_tensors_it != output_tensors.end())
{
// This tensor is the output of another operator.
// It must be marked as intermediate tensor.
output_tensors.erase(output_tensors_it);
_interm_tensors.insert(tensor);
}
- else if(_interm_tensors.find(tensor) == _interm_tensors.end())
+ else if (_interm_tensors.find(tensor) == _interm_tensors.end())
{
_input_tensors.insert(tensor);
@@ -164,7 +166,7 @@ void GpuKernelComponentGroup::finalize()
}
}
- for(auto tensor : dst_tensors)
+ for (auto tensor : dst_tensors)
{
ARM_COMPUTE_ERROR_ON(_input_tensors.find(tensor) != _input_tensors.end());
ARM_COMPUTE_ERROR_ON(output_tensors.find(tensor) != output_tensors.end());
@@ -177,27 +179,27 @@ void GpuKernelComponentGroup::finalize()
// Check if the output can overwrite the input tile.
const auto component_type = component->type();
- if(component_type == GpuComponentType::Simple || component_type == GpuComponentType::Output)
+ if (component_type == GpuComponentType::Simple || component_type == GpuComponentType::Output)
{
ARM_COMPUTE_ERROR_ON(dst_tensors.size() != 1);
- const auto dst_tensor = dst_tensors[0];
- const auto &dst_shape = dst_tensor->tensor_shape();
- const auto &dst_type = dst_tensor->data_type();
+ const auto dst_tensor = dst_tensors[0];
+ const auto &dst_shape = dst_tensor->tensor_shape();
+ const auto &dst_type = dst_tensor->data_type();
tile_usages[dst_tensor] = 0;
- for(auto src_tensor : src_tensors)
+ for (auto src_tensor : src_tensors)
{
const auto &src_shape = src_tensor->tensor_shape();
- const auto &src_type = src_tensor->data_type();
+ const auto &src_type = src_tensor->data_type();
- if(src_shape == dst_shape && src_type == dst_type)
+ if (src_shape == dst_shape && src_type == dst_type)
{
const auto tile_usages_it = tile_usages.find(src_tensor);
ARM_COMPUTE_ERROR_ON(tile_usages_it == tile_usages.end());
- if(component_type == GpuComponentType::Simple || tile_usages_it->second > 0)
+ if (component_type == GpuComponentType::Simple || tile_usages_it->second > 0)
{
// Increase the number of tile usages unless this component is an output
// and the tile has not been shared with any component.
@@ -212,7 +214,7 @@ void GpuKernelComponentGroup::finalize()
else
{
// Outputs of complex and unfusable components need dedicated tile.
- for(auto tensor : dst_tensors)
+ for (auto tensor : dst_tensors)
{
tile_usages[tensor] = 0;
}
@@ -220,25 +222,25 @@ void GpuKernelComponentGroup::finalize()
}
// Find the smallest list of tiles that the intermediate tensors need to write to.
- for(auto tensor : _input_tensors)
+ for (auto tensor : _input_tensors)
{
_tile_map[tensor] = tensor;
}
- for(auto component : _components)
+ for (auto component : _components)
{
const auto dst_tensors = component->tensors().get_const_dst_tensors();
- for(auto tensor : dst_tensors)
+ for (auto tensor : dst_tensors)
{
const auto target_tiles = possible_tile_map.at(tensor);
- _tile_map[tensor] = tensor;
+ _tile_map[tensor] = tensor;
- for(auto target : target_tiles)
+ for (auto target : target_tiles)
{
const auto num_usage = tile_usages[target];
- if(num_usage <= 1)
+ if (num_usage <= 1)
{
// The target tile is consumed by only this operator, so we can reuse it
// for the destination tensor data.
@@ -249,26 +251,23 @@ void GpuKernelComponentGroup::finalize()
}
}
- for(auto tensor : output_tensors)
+ for (auto tensor : output_tensors)
{
_tile_map[tensor] = tensor;
}
// All intermediate tensors that cannot be shared with any previous tensor
// will need to be declared as tile variable.
- for(auto tensor_tile : _tile_map)
+ for (auto tensor_tile : _tile_map)
{
- if(tensor_tile.first == tensor_tile.second &&
- _interm_tensors.find(tensor_tile.first) != _interm_tensors.end())
+ if (tensor_tile.first == tensor_tile.second && _interm_tensors.find(tensor_tile.first) != _interm_tensors.end())
{
_tiles.push_back(tensor_tile.first);
}
}
- std::set_union(
- _input_tensors.begin(), _input_tensors.end(),
- output_tensors.begin(), output_tensors.end(),
- std::back_inserter(_argument_tensors));
+ std::set_union(_input_tensors.begin(), _input_tensors.end(), output_tensors.begin(), output_tensors.end(),
+ std::back_inserter(_argument_tensors));
_any_output_tensor = *output_tensors.begin();
}
@@ -282,7 +281,7 @@ const ITensorInfo *GpuKernelComponentGroup::get_tile_for_tensor(const ITensorInf
{
ARM_COMPUTE_ERROR_ON_MSG(!_finalized, "The component group must have been finalized.");
- if(_tile_map.find(tensor) != _tile_map.end())
+ if (_tile_map.find(tensor) != _tile_map.end())
{
return _tile_map.at(tensor);
}
@@ -304,7 +303,7 @@ std::vector<const ITensorInfo *> GpuKernelComponentGroup::get_argument_tensors()
GpuKernelComponentGroup::ComponentPtr GpuKernelComponentGroup::get_root_component() const
{
- if(empty())
+ if (empty())
{
return nullptr;
}
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
index c939aec369..6ad71abb39 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h
@@ -25,12 +25,11 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTGROUP
#include "components/Types.h"
-
#include <cstdint>
#include <cstdlib>
-#include <vector>
-#include <set>
#include <map>
+#include <set>
+#include <vector>
namespace arm_compute
{
@@ -129,9 +128,9 @@ public:
/** Get the number of components within the group */
size_t size() const;
/** Check if the component group is empty */
- bool empty() const;
- ComponentPtr &operator[](size_t index);
- const ComponentPtr &operator[](size_t index) const;
+ bool empty() const;
+ ComponentPtr &operator[](size_t index);
+ const ComponentPtr &operator[](size_t index) const;
typename std::vector<ComponentPtr>::iterator begin();
typename std::vector<ComponentPtr>::iterator end();
typename std::vector<ComponentPtr>::const_iterator begin() const;
@@ -142,13 +141,13 @@ public:
private:
std::vector<ComponentPtr> _components{};
- bool _finalized{ false };
+ bool _finalized{false};
- std::vector<const ITensorInfo *> _argument_tensors{};
- std::set<const ITensorInfo *> _input_tensors{};
- std::set<const ITensorInfo *> _interm_tensors{};
- const ITensorInfo *_any_output_tensor{ nullptr };
- std::vector<const ITensorInfo *> _tiles{};
+ std::vector<const ITensorInfo *> _argument_tensors{};
+ std::set<const ITensorInfo *> _input_tensors{};
+ std::set<const ITensorInfo *> _interm_tensors{};
+ const ITensorInfo *_any_output_tensor{nullptr};
+ std::vector<const ITensorInfo *> _tiles{};
std::map<const ITensorInfo *, const ITensorInfo *> _tile_map{};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
index a2b6623370..8042e3dd08 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.cpp
@@ -23,9 +23,9 @@
*/
#include "GpuKernelComponentStream.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
namespace arm_compute
{
@@ -33,8 +33,10 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuKernelComponentStream::GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map)
- : _context{ context }, _services{ services }, _component_groups{}, _mem_map{ mem_map }
+GpuKernelComponentStream::GpuKernelComponentStream(GpuWorkloadContext *context,
+ GpuComponentServices *services,
+ const MemoryDescriptorMap &mem_map)
+ : _context{context}, _services{services}, _component_groups{}, _mem_map{mem_map}
{
}
@@ -42,7 +44,7 @@ GpuWorkloadSourceCode GpuKernelComponentStream::write_workload_code()
{
GpuWorkloadSourceCode source_code;
// Traverse through component groups and assemble workload together
- for(auto && group : _component_groups)
+ for (auto &&group : _component_groups)
{
group.finalize();
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
index ba2503a938..ef8a8a15b0 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelComponentStream.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUKERNELCOMPONENTSTREAM
#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
@@ -53,7 +54,9 @@ public:
* @param[in] services @ref GpuComponentServices to be used throughout the stream
* @param[in] mem_map @ref MemoryDescriptor map used to assemble the @ref GpuWorkloadSourceCode
*/
- GpuKernelComponentStream(GpuWorkloadContext *context, GpuComponentServices *services, const MemoryDescriptorMap &mem_map);
+ GpuKernelComponentStream(GpuWorkloadContext *context,
+ GpuComponentServices *services,
+ const MemoryDescriptorMap &mem_map);
/** Allow instances of this class to be copy constructed */
GpuKernelComponentStream(const GpuKernelComponentStream &stream) = default;
/** Allow instances of this class to be copied */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
index 64e1cdc3bc..24812cd8a7 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
diff --git a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
index c99984fc0e..502ceab807 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuLogicalKernel.cpp
@@ -26,9 +26,9 @@
#include "arm_compute/core/experimental/Types.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.h"
#else // ACL_INTERNAL_TEST_CKW_IN_DF
@@ -42,7 +42,7 @@ namespace experimental
namespace dynamic_fusion
{
GpuLogicalKernel::GpuLogicalKernel(GpuComponentServices *services, const GpuKernelComponentGroup &components)
- : _comp_group{ components }, _store_components{}
+ : _comp_group{components}, _store_components{}
{
ARM_COMPUTE_UNUSED(services);
}
@@ -51,9 +51,9 @@ GpuKernelSourceCode GpuLogicalKernel::write_kernel_code()
{
GpuKernelSourceCode code;
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- ClTemplateWriter writer { _comp_group };
+ ClTemplateWriter writer{_comp_group};
#else // ACL_INTERNAL_TEST_CKW_IN_DF
- GpuCkwDriver writer { _comp_group };
+ GpuCkwDriver writer{_comp_group};
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
code.name(writer.get_name());
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
index 7bb14c8698..aec8b9db4f 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.cpp
@@ -36,20 +36,15 @@ namespace
std::vector<DependencyGraph::TensorId> get_tensor_ids(const std::vector<const ITensorInfo *> tensors)
{
std::vector<DependencyGraph::TensorId> tensor_ids{};
- std::transform(
- std::begin(tensors), std::end(tensors),
- std::back_inserter(tensor_ids),
- [](const auto & t)
- {
- return t->id();
- });
+ std::transform(std::begin(tensors), std::end(tensors), std::back_inserter(tensor_ids),
+ [](const auto &t) { return t->id(); });
return tensor_ids;
}
} // namespace
Operator::Operator(OperatorId id, GpuOperatorType operator_type, const ArgumentPack<ITensorInfo> &tensors)
- : _id{ id }, _operator_type{ operator_type }, _tensors{ tensors }
+ : _id{id}, _operator_type{operator_type}, _tensors{tensors}
{
}
@@ -73,69 +68,69 @@ bool GpuOperatorGroup::try_add_operator(const Operator &op, bool is_output) cons
const auto src_tensor_ids = get_tensor_ids(op.tensors().get_const_src_tensors());
const auto dst_tensor_ids = get_tensor_ids(op.tensors().get_const_dst_tensors());
// Constraint 1
- if(!_graph.try_add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids, is_output))
+ if (!_graph.try_add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids, is_output))
{
return false;
}
// Constraint 2
- if(_operators.size() >= max_fused_operators)
+ if (_operators.size() >= max_fused_operators)
{
return false;
}
// Constraint 3.1: Pattern: (Unfusable)
- if(_operators.size() > 0 && get_root_operator()->operator_type() == GpuOperatorType::Unfusable)
+ if (_operators.size() > 0 && get_root_operator()->operator_type() == GpuOperatorType::Unfusable)
{
return false;
}
// Constraint 3.2
- if(_operators.size() > 0 && (op.operator_type() != GpuOperatorType::Simple))
+ if (_operators.size() > 0 && (op.operator_type() != GpuOperatorType::Simple))
{
return false;
}
// Constraint 4
- if(op.operator_type() != GpuOperatorType::Unfusable && op.tensors().get_const_dst_tensors().size() != 1U)
+ if (op.operator_type() != GpuOperatorType::Unfusable && op.tensors().get_const_dst_tensors().size() != 1U)
{
return false;
}
// Constraint 5
- if(_operators.size() > 0)
+ if (_operators.size() > 0)
{
const auto root_dst_tensors = get_root_operator()->tensors().get_const_dst_tensors();
ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
const auto first_dst_tensor = root_dst_tensors[0];
const auto dst_tensors = op.tensors().get_const_dst_tensors();
- for(const auto &t : root_dst_tensors)
+ for (const auto &t : root_dst_tensors)
{
- if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
{
return false;
}
}
- for(const auto &t : dst_tensors)
+ for (const auto &t : dst_tensors)
{
- if(detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
+ if (detail::have_different_dimensions(t->tensor_shape(), first_dst_tensor->tensor_shape(), 0))
{
return false;
}
}
}
// Constraint 6
- if(_operators.size() > 0)
+ if (_operators.size() > 0)
{
const auto root_dst_tensors = get_root_operator()->tensors().get_const_dst_tensors();
ARM_COMPUTE_ERROR_ON(root_dst_tensors.empty());
const auto first_dst_tensor_layout = root_dst_tensors[0]->data_layout();
const auto dst_tensors = op.tensors().get_const_dst_tensors();
- for(const auto &t : root_dst_tensors)
+ for (const auto &t : root_dst_tensors)
{
- if(t->data_layout() != first_dst_tensor_layout)
+ if (t->data_layout() != first_dst_tensor_layout)
{
return false;
}
}
- for(const auto &t : dst_tensors)
+ for (const auto &t : dst_tensors)
{
- if(t->data_layout() != first_dst_tensor_layout)
+ if (t->data_layout() != first_dst_tensor_layout)
{
return false;
}
@@ -151,16 +146,17 @@ void GpuOperatorGroup::add_operator(const Operator &op, bool is_output)
_graph.add_operator_as_linear(op.id(), src_tensor_ids, dst_tensor_ids, is_output);
_operators[op.id()] = op;
}
-Operator GpuOperatorGroup::new_operator(const GpuOperatorType &operator_type, const ArgumentPack<ITensorInfo> &tensors) const
+Operator GpuOperatorGroup::new_operator(const GpuOperatorType &operator_type,
+ const ArgumentPack<ITensorInfo> &tensors) const
{
auto new_id = static_cast<OperatorId>(_operators.size());
- return Operator{ new_id, operator_type, tensors };
+ return Operator{new_id, operator_type, tensors};
}
const Operator *GpuOperatorGroup::get_root_operator() const
{
const auto roots = _graph.get_root_ops();
ARM_COMPUTE_ERROR_ON(roots.size() > 1);
- if(roots.empty())
+ if (roots.empty())
{
return nullptr;
}
diff --git a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
index 308a9d796a..0a2369d357 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h
@@ -25,9 +25,11 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUOPERATORGROUP
#include "arm_compute/core/ITensorInfo.h"
+
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h"
#include "src/dynamic_fusion/sketch/utils/DependencyGraph.h"
+
#include <map>
namespace arm_compute
@@ -104,7 +106,7 @@ public:
const Operator *get_root_operator() const;
private:
- DependencyGraph _graph{};
+ DependencyGraph _graph{};
std::map<OperatorId, Operator> _operators{};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
index c2bd012703..36cad790c7 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContext.cpp
@@ -23,7 +23,9 @@
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+
#include "arm_compute/core/CL/CLCompileContext.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
namespace arm_compute
@@ -33,7 +35,7 @@ namespace experimental
namespace dynamic_fusion
{
GpuWorkloadContext::GpuWorkloadContext(CLCompileContext *cl_compile_ctx)
- : _impl{ std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx) }
+ : _impl{std::make_unique<Impl>(GpuLanguage::OpenCL, cl_compile_ctx)}
{
}
@@ -74,7 +76,11 @@ const GpuWorkloadContext::Impl &GpuWorkloadContext::implementation() const
}
GpuWorkloadContext::Impl::Impl(GpuLanguage gpu_language, CLCompileContext *cl_compile_ctx)
- : _gpu_language(gpu_language), _cl_compile_ctx(cl_compile_ctx), _next_tensor_id(1), _mem_map(), _managed_tensor_info()
+ : _gpu_language(gpu_language),
+ _cl_compile_ctx(cl_compile_ctx),
+ _next_tensor_id(1),
+ _mem_map(),
+ _managed_tensor_info()
{
}
@@ -100,7 +106,7 @@ void GpuWorkloadContext::Impl::register_user_tensor(ITensorInfo &tensor_info)
const auto tensor_id = next_tensor_id();
tensor_info.set_id(tensor_id);
- _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::User };
+ _mem_map[tensor_id] = MemoryDescriptor{MemoryType::User};
// Save a *copy* of the user tensor info in workload context for future reference
// Note that this means if the user modifies the @p tensor_info, the change will not be reflected in the context
_managed_tensor_info.emplace(tensor_info.id(), std::make_unique<TensorInfo>(tensor_info));
@@ -111,7 +117,7 @@ ITensorInfo *GpuWorkloadContext::Impl::create_virtual_tensor()
auto tensor_info = std::make_unique<TensorInfo>();
const auto tensor_id = -next_tensor_id();
tensor_info->set_id(tensor_id);
- _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Virtual };
+ _mem_map[tensor_id] = MemoryDescriptor{MemoryType::Virtual};
auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
return inserted.first->second.get();
}
@@ -121,7 +127,7 @@ ITensorInfo *GpuWorkloadContext::Impl::create_auxiliary_tensor(const ITensorInfo
auto tensor_info = std::make_unique<TensorInfo>(itensor_info);
const auto tensor_id = next_tensor_id();
tensor_info->set_id(tensor_id);
- _mem_map[tensor_id] = MemoryDescriptor{ MemoryType::Auxiliary, AuxMemoryInfo{ tensor_info->total_size() } };
+ _mem_map[tensor_id] = MemoryDescriptor{MemoryType::Auxiliary, AuxMemoryInfo{tensor_info->total_size()}};
auto inserted = _managed_tensor_info.emplace(tensor_info->id(), std::move(tensor_info));
return inserted.first->second.get();
}
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
index c169476a70..7d9699031f 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h
@@ -27,8 +27,8 @@
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/ITensorInfo.h"
-#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
namespace arm_compute
{
@@ -93,8 +93,8 @@ private:
GpuLanguage _gpu_language;
CLCompileContext *_cl_compile_ctx;
- ITensorInfo::Id _next_tensor_id;
- MemoryDescriptorMap _mem_map;
+ ITensorInfo::Id _next_tensor_id;
+ MemoryDescriptorMap _mem_map;
std::map<ITensorInfo::Id, std::unique_ptr<TensorInfo>> _managed_tensor_info;
};
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
index d3a20c0dfe..973f7c747f 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
namespace arm_compute
@@ -30,8 +31,7 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuWorkloadSketch::GpuWorkloadSketch(Context *context)
- : _impl{ std::make_unique<Implementation>(context) }
+GpuWorkloadSketch::GpuWorkloadSketch(Context *context) : _impl{std::make_unique<Implementation>(context)}
{
}
GpuWorkloadSketch::~GpuWorkloadSketch()
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
index d3033898e9..fea4fe9577 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h
@@ -24,8 +24,9 @@
#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_GPUWORKLOADSKETCHIMPL
-#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
+#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuComponentServices.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGraph.h"
#include "src/dynamic_fusion/sketch/gpu/GpuOperatorGroup.h"
@@ -45,12 +46,8 @@ public:
*
* @param[in] context global workload creation context
*/
- explicit Implementation(
- Context *context)
- : _context{ context },
- _comp_services{},
- _component_graph{ _context, &_comp_services },
- _operator_group{}
+ explicit Implementation(Context *context)
+ : _context{context}, _comp_services{}, _component_graph{_context, &_comp_services}, _operator_group{}
{
}
/** Prevent instances of this class from being copy constructed */
diff --git a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
index 578366daaf..43bcc47fa0 100644
--- a/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
+++ b/src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/dynamic_fusion/sketch/MemoryDescriptor.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadContextImpl.h"
@@ -45,7 +46,7 @@ namespace
*/
GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &flat_kernel_args)
{
- if(flat_kernel_args.empty())
+ if (flat_kernel_args.empty())
{
return {};
}
@@ -56,10 +57,10 @@ GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &
flat_kernel_args.pop_front();
const auto tensor_id = karg_head.id();
- while(!flat_kernel_args.empty())
+ while (!flat_kernel_args.empty())
{
const GpuKernelArgumentBinding &karg = flat_kernel_args.front();
- if(karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments
+ if (karg.id() != tensor_id) // Encounter the next tensor, return the current tensor's kernel arguments
{
return tensor_kargs;
}
@@ -68,7 +69,7 @@ GpuKernelArgumentList extract_kernel_args_for_one_tensor(GpuKernelArgumentList &
}
return tensor_kargs;
}
-}
+} // namespace
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Uniquely identifies a @ref GpuUnitWorkload within a @ref GpuWorkloadSourceCode */
using UnitWorkloadId = int32_t;
@@ -92,9 +93,7 @@ public:
GpuWorkloadArgument(const ITensorInfo &tensor_info,
const MemoryDescriptor &mem_desc,
const GpuKernelArgumentInfo &kernel_arg_info)
- : _tensor_info{ tensor_info },
- _mem_desc{ mem_desc },
- _kernel_arg_info{ kernel_arg_info }
+ : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_arg_info{kernel_arg_info}
{
}
#else // ACL_INTERNAL_TEST_CKW_IN_DF
@@ -107,9 +106,7 @@ public:
GpuWorkloadArgument(const ITensorInfo &tensor_info,
const MemoryDescriptor &mem_desc,
const GpuKernelArgumentList &kernel_args)
- : _tensor_info{ tensor_info },
- _mem_desc{ mem_desc },
- _kernel_args{ kernel_args }
+ : _tensor_info{tensor_info}, _mem_desc{mem_desc}, _kernel_args{kernel_args}
{
}
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
@@ -175,9 +172,9 @@ private:
TensorInfo _tensor_info{};
MemoryDescriptor _mem_desc{};
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- GpuKernelArgumentInfo _kernel_arg_info {};
+ GpuKernelArgumentInfo _kernel_arg_info{};
#else // ACL_INTERNAL_TEST_CKW_IN_DF
- GpuKernelArgumentList _kernel_args {};
+ GpuKernelArgumentList _kernel_args{};
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
@@ -190,7 +187,7 @@ struct UnitWorkloadStage
Prepare, /**< Only run once at the beginning. */
Run, /**< Run every time after the first time. */
};
- Stage stage{ Stage::Run };
+ Stage stage{Stage::Run};
};
inline bool operator==(const UnitWorkloadStage &stage0, const UnitWorkloadStage &stage1)
@@ -212,7 +209,7 @@ public:
* @param[in] stage Stage of the unit workload
*/
GpuUnitWorkload(UnitWorkloadId id, const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage)
- : _id{ id }, _kernel_code{ kernel_code }, _stage{ stage }
+ : _id{id}, _kernel_code{kernel_code}, _stage{stage}
{
}
/** Get the id of the unit workload */
@@ -253,7 +250,10 @@ public:
*
* @return UnitWorkloadId Allocated unit workload id
*/
- UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code, const UnitWorkloadStage &stage, const MemoryDescriptorMap &mem_map, const GpuWorkloadContext *context)
+ UnitWorkloadId add_unit_workload(const GpuKernelSourceCode &kernel_code,
+ const UnitWorkloadStage &stage,
+ const MemoryDescriptorMap &mem_map,
+ const GpuWorkloadContext *context)
{
// Use the size of the kernel codes as Id
const auto uwk_id = static_cast<UnitWorkloadId>(_unit_workloads.size());
@@ -262,12 +262,13 @@ public:
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
ARM_COMPUTE_UNUSED(context);
// Assemble kernel argument with memory descriptor to form workload argument
- for(const auto &id_arg : kernel_code.arguments())
+ for (const auto &id_arg : kernel_code.arguments())
{
- const auto arg_id = id_arg.first;
- const auto arg = id_arg.second;
- _workload_arguments[arg_id] = GpuWorkloadArgument{ *arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info() };
- if(_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
+ const auto arg_id = id_arg.first;
+ const auto arg = id_arg.second;
+ _workload_arguments[arg_id] =
+ GpuWorkloadArgument{*arg.tensor_info(), mem_map.at(arg_id), *arg.kernel_argument_info()};
+ if (_tensor_uwork_map.find(arg_id) == _tensor_uwork_map.end())
{
_tensor_uwork_map[arg_id] = std::set<UnitWorkloadId>();
}
@@ -276,18 +277,19 @@ public:
#else // ACL_INTERNAL_TEST_CKW_IN_DF
GpuKernelArgumentList flat_kernel_args = kernel_code.arguments();
GpuKernelArgumentList tensor_kargs{};
- while(true)
+ while (true)
{
tensor_kargs = extract_kernel_args_for_one_tensor(flat_kernel_args);
- if(tensor_kargs.empty())
+ if (tensor_kargs.empty())
{
break;
}
else
{
const auto tensor_id = tensor_kargs.at(0).id();
- _workload_arguments[tensor_id] = GpuWorkloadArgument{ *context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs };
- if(_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end())
+ _workload_arguments[tensor_id] = GpuWorkloadArgument{
+ *context->implementation().get_tensor_info(tensor_id), mem_map.at(tensor_id), tensor_kargs};
+ if (_tensor_uwork_map.find(tensor_id) == _tensor_uwork_map.end())
{
_tensor_uwork_map[tensor_id] = std::set<UnitWorkloadId>();
}
@@ -308,7 +310,7 @@ public:
{
std::vector<UnitWorkloadId> ids{};
- for(const auto &uwk : _unit_workloads)
+ for (const auto &uwk : _unit_workloads)
{
ids.push_back(uwk.id());
}
@@ -323,7 +325,7 @@ public:
std::vector<ITensorInfo::Id> tensors() const
{
std::vector<ITensorInfo::Id> ids{};
- for(const auto &id_tensor : _workload_arguments)
+ for (const auto &id_tensor : _workload_arguments)
{
ids.push_back(id_tensor.first);
}
@@ -337,7 +339,7 @@ public:
}
private:
- std::vector<GpuUnitWorkload> _unit_workloads{};
+ std::vector<GpuUnitWorkload> _unit_workloads{};
std::map<ITensorInfo::Id, GpuWorkloadArgument> _workload_arguments{};
std::map<ITensorInfo::Id, std::set<UnitWorkloadId>> _tensor_uwork_map{};
};
diff --git a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
index 1d8b231efd..ad474674f9 100644
--- a/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/Window.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelSourceCode.h"
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
index 4b4c22fa1d..c4ab110c92 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
@@ -23,6 +23,7 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
#include "ckw/Error.h"
namespace arm_compute
@@ -36,12 +37,12 @@ GpuCkwComponentArgument::GpuCkwComponentArgument()
{
}
-GpuCkwComponentArgument::GpuCkwComponentArgument(ckw::TensorOperand &tensor)
- : _tensor(&tensor)
+GpuCkwComponentArgument::GpuCkwComponentArgument(ckw::TensorOperand &tensor) : _tensor(&tensor)
{
}
-GpuCkwComponentArgument &GpuCkwComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler)
+GpuCkwComponentArgument &GpuCkwComponentArgument::init_virtual_tensor(ckw::TileOperand &tile,
+ const ckw::TensorTileSampler &tile_sampler)
{
CKW_ASSERT(_tile == nullptr);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
index 80f91389a0..863989a7bd 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
@@ -110,9 +110,9 @@ public:
const ckw::TensorTileSampler &tile_sampler() const;
private:
- ckw::TensorOperand *_tensor{ nullptr };
- ckw::TileOperand *_tile{ nullptr };
- ckw::TensorTileSampler _tile_sampler{};
+ ckw::TensorOperand *_tensor{nullptr};
+ ckw::TileOperand *_tile{nullptr};
+ ckw::TensorTileSampler _tile_sampler{};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
index a24a172d77..c927f32bde 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
@@ -23,17 +23,16 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Window.h"
+
#include "src/common/utils/Log.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
-
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
using namespace ckw;
namespace arm_compute
@@ -43,11 +42,11 @@ namespace experimental
namespace dynamic_fusion
{
GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
- : _components{ components }, _kernel{ GpuTargetLanguage::OpenCL }, _code{}
+ : _components{components}, _kernel{GpuTargetLanguage::OpenCL}, _code{}
{
// Generate kernel name
std::string name = "";
- for(auto &comp : _components)
+ for (auto &comp : _components)
{
auto ckw_driver = comp->ckw_component_driver();
ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
@@ -60,7 +59,7 @@ GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
GpuCkwScopedKernelWriter writer(&root_writer);
GpuCkwVariableTable vtable{};
- for(auto &comp : _components)
+ for (auto &comp : _components)
{
auto ckw_driver = comp->ckw_component_driver();
ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
@@ -82,7 +81,7 @@ std::string GpuCkwDriver::get_code()
std::string GpuCkwDriver::get_config_id()
{
std::string id = "";
- for(auto &comp : _components)
+ for (auto &comp : _components)
{
auto ckw_driver = comp->ckw_component_driver();
ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
@@ -101,9 +100,9 @@ Window GpuCkwDriver::get_window() const
GpuKernelArgumentList GpuCkwDriver::get_kernel_arguments()
{
GpuKernelArgumentList args{};
- for(const auto &arg : _kernel.arguments())
+ for (const auto &arg : _kernel.arguments())
{
- switch(arg.type())
+ switch (arg.type())
{
case KernelArgument::Type::TensorStorage:
{
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
index 19db575fea..2ca5fb435c 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
@@ -24,12 +24,12 @@
#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER
+#include "ckw/Kernel.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
-#include "ckw/Kernel.h"
-
#include <map>
#include <string>
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp
index ca4f121566..5f8ce919e3 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp
@@ -23,10 +23,12 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
#include "ckw/Error.h"
#include "ckw/TileInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
namespace arm_compute
{
namespace experimental
@@ -34,21 +36,21 @@ namespace experimental
namespace dynamic_fusion
{
-GpuCkwKernelWriter::GpuCkwKernelWriter(ckw::Kernel &kernel)
- : KernelWriter(kernel)
+GpuCkwKernelWriter::GpuCkwKernelWriter(ckw::Kernel &kernel) : KernelWriter(kernel)
{
}
void GpuCkwKernelWriter::op_load_once(GpuCkwComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler)
{
- if(!tensor_or_tile->has_tile())
+ if (!tensor_or_tile->has_tile())
{
CKW_ASSERT(tensor_or_tile->has_tensor());
auto &tensor = tensor_or_tile->tensor();
const auto tile_name = tensor.name() + "_tile";
- auto &tile = declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
+ auto &tile =
+ declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
op_load(tile, tensor, sampler);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
index 043fda9e6f..cbadbd9639 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
@@ -23,6 +23,7 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
index 4d11b5e3e4..81049bfe37 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
@@ -63,7 +63,7 @@ public:
private:
GpuCkwKernelWriter *_writer;
- int32_t _parent_id_space;
+ int32_t _parent_id_space;
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
index 37c27cd116..88a0cf7f43 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
@@ -23,11 +23,12 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
#include <sstream>
namespace arm_compute
@@ -36,19 +37,22 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
- const std::string &alias)
+GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group,
+ GpuCkwScopedKernelWriter &writer,
+ const ITensorInfo *tensor,
+ TensorStorageType storage,
+ const std::string &alias)
{
ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
// Do not re-declare if the variable associated with the tensor has already been declared
auto it = _vars.find(tensor->id());
- if(it != _vars.end())
+ if (it != _vars.end())
{
return &it->second;
}
- if(comp_group.is_intermediate_tensor(tensor))
+ if (comp_group.is_intermediate_tensor(tensor))
{
// Create a virtual tensor variable
GpuCkwComponentArgument var;
@@ -61,7 +65,7 @@ GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCo
std::stringstream ss;
ss << alias << "_t" << abs(tensor->id());
const auto uniq_name = ss.str();
- GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage)) };
+ GpuCkwComponentArgument var{writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage))};
auto &&inserted = _vars.emplace(tensor->id(), var);
return &(inserted.first->second);
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
index 0649dcba9d..2b118911b8 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
@@ -25,6 +25,7 @@
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE
#include "arm_compute/core/ITensorInfo.h"
+
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include <map>
@@ -58,8 +59,11 @@ public:
*
* @return GpuCkwComponentArgument*
*/
- GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
- const std::string &alias = "unnamed");
+ GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group,
+ GpuCkwScopedKernelWriter &writer,
+ const ITensorInfo *tensor,
+ TensorStorageType storage,
+ const std::string &alias = "unnamed");
private:
std::map<ITensorInfo::Id, GpuCkwComponentArgument> _vars{};
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
index 14086f785e..52e56e2e35 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
@@ -25,6 +25,7 @@
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_IGPUCKWCOMPONENTDRIVER
#include "arm_compute/core/Window.h"
+
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/components/Types.h"
@@ -73,8 +74,7 @@ public:
* @param[in] id Component id
* @param[in] tensors Tensor arguments to the components
*/
- IGpuCkwComponentDriver(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : _id{ id }, _tensors{ tensors }
+ IGpuCkwComponentDriver(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors}
{
}
/** Destructor */
@@ -89,7 +89,9 @@ public:
*
* @note @p writer can only be passed via value since the new scope is created in the copy constructor
*/
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const = 0;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const = 0;
/** Get tensor arguments */
ArgumentPack<ITensorInfo> tensors() const
{
@@ -128,7 +130,7 @@ public:
}
private:
- ComponentId _id{ -1 };
+ ComponentId _id{-1};
ArgumentPack<ITensorInfo> _tensors{};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
index c07fac0e0d..c3b1b3c8bc 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
@@ -24,16 +24,18 @@
#include "GpuCkwActivation.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
+
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
#include <string>
using namespace ckw;
@@ -87,24 +89,25 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
GpuCkwActivation::GpuCkwActivation(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes }
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
}
-void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler);
@@ -119,7 +122,7 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
// Perform the operation.
- switch(_attributes.activation())
+ switch (_attributes.activation())
{
case ActivationLayerInfo::ActivationFunction::LOGISTIC:
{
@@ -179,9 +182,10 @@ Window GpuCkwActivation::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
index e157e36cbf..386e933a72 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
@@ -46,15 +46,15 @@ public:
* @param[in] tensors Tensor arguments to the component
* @param[in] attributes Component attributes
*/
- GpuCkwActivation(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ GpuCkwActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwActivation);
/** Destructor */
~GpuCkwActivation() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- Window get_window() const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
private:
const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
index 6ecf2bac44..e8e5087633 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -24,16 +24,18 @@
#include "GpuCkwCast.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
+
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
#include <string>
using namespace ckw;
@@ -84,30 +86,29 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
}
} // namespace
-GpuCkwCast::GpuCkwCast(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes }
+GpuCkwCast::GpuCkwCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
}
-void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwCast::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the source tile and prepare the sampler.
- if(!src->has_tile())
+ if (!src->has_tile())
{
const auto sampler = create_sampler(writer, m0, n0);
writer->op_load_once(src, sampler);
@@ -122,7 +123,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
const auto &sampler = src->tile_sampler();
// Prepare the output tile.
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
// Get Target datatype and convert it to ckw::DataType.
ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
@@ -143,7 +144,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
const size_t dst_size = data_size_from_type(_dst->data_type());
const bool cast_down = (src_size >= dst_size);
- if(cast_down && is_data_type_quantized(_src->data_type()))
+ if (cast_down && is_data_type_quantized(_src->data_type()))
{
const auto &constant_x80 = writer->declare_tile("0x80", 0x80);
writer->op_binary_expression(src_tile, src_tile, BinaryOp::BitwiseXOR, constant_x80);
@@ -151,7 +152,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
ckw::ConvertPolicy convert_policy = ckw::ConvertPolicy::None;
- if(cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
+ if (cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
{
convert_policy = ckw::ConvertPolicy::Saturate;
}
@@ -167,9 +168,10 @@ Window GpuCkwCast::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
index 821cec1e19..2389301196 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
@@ -46,15 +46,15 @@ public:
* @param[in] tensors Tensor arguments to the component
* @param[in] attributes Component attributes
*/
- GpuCkwCast(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ GpuCkwCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwCast);
/** Destructor */
~GpuCkwCast() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- Window get_window() const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
private:
const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
index 3c906646a6..7833da2334 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
@@ -25,21 +25,20 @@
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
-
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
#include "ckw/TileInfo.h"
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
namespace arm_compute
{
@@ -54,13 +53,7 @@ GpuCkwDirectConv2d::GpuCkwDirectConv2d(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _wei{},
- _bia{},
- _dst{},
- _attributes{ attributes },
- _settings{ settings }
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _wei{}, _bia{}, _dst{}, _attributes{attributes}, _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_wei = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -69,7 +62,9 @@ GpuCkwDirectConv2d::GpuCkwDirectConv2d(ComponentId id,
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _wei, _dst); // Bias can be null
}
-void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto desc = _settings.direct_conv_descriptor();
ARM_COMPUTE_ERROR_ON_MSG(desc.export_input_to_cl_image || desc.export_output_to_cl_image,
@@ -99,15 +94,18 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
// extra loop to compute the left-over elements.
const bool use_cl_image_for_weights = desc.export_weights_to_cl_image && (k0 == 4) && (K % 4 == 0);
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
GpuCkwComponentArgument *wei = vtable.declare_variable(
- comp_group, writer, _wei, use_cl_image_for_weights ? TensorStorageType::ClImage2dReadOnly : TensorStorageType::ClBufferUint8Ptr, "wei");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ comp_group, writer, _wei,
+ use_cl_image_for_weights ? TensorStorageType::ClImage2dReadOnly : TensorStorageType::ClBufferUint8Ptr, "wei");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
GpuCkwComponentArgument *bia = nullptr;
const bool using_bias = _bia != nullptr;
- if(using_bias)
+ if (using_bias)
{
bia = vtable.declare_variable(comp_group, writer, _bia, TensorStorageType::ClBufferUint8Ptr, "bia");
}
@@ -154,7 +152,8 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
src_sampler.address_mode_x(TensorSamplerAddressModeX::None);
// We cannot have out-of-bounds reads when the kernel height is equal to 1. Otherwise, we need to ensure the
// indirection buffer mi does not contain negative values representing out-of-bounds reads.
- src_sampler.address_mode_y(kernel_height == 1 ? TensorSamplerAddressModeY::None : TensorSamplerAddressModeY::SkipMinEdgeOnly);
+ src_sampler.address_mode_y(kernel_height == 1 ? TensorSamplerAddressModeY::None
+ : TensorSamplerAddressModeY::SkipMinEdgeOnly);
src_sampler.address_mode_z(TensorSamplerAddressModeZ::None);
TensorTileSampler wei_sampler;
@@ -178,7 +177,7 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
dst_sampler.z(tile_0);
dst_sampler.b(tile_bout);
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
auto &tile = writer->declare_tile("dst", TileInfo(to_ckw(_dst->data_type()), m0, n0));
dst->init_virtual_tensor(tile, dst_sampler);
@@ -189,10 +188,10 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
// We create a 2d container of size (M0, 1) to store the indices for iteration
TileContainer it;
- for(int m = 0; m < m0; ++m)
+ for (int m = 0; m < m0; ++m)
{
- std::vector<std::string> idx { std::to_string(m) };
- it.push_back({ idx });
+ std::vector<std::string> idx{std::to_string(m)};
+ it.push_back({idx});
}
const auto &tile_it = writer->declare_tile("it", it, ckw::DataType::Int32);
@@ -289,9 +288,9 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
// Bias addition
// NOTE: This operation will be removed from this kernel as the interface is standardized. The intended way of
// performing bias addition is to fuse this convolution kernel with a following elementwise addition kernel.
- if(using_bias)
+ if (using_bias)
{
- if(!bia->has_tile())
+ if (!bia->has_tile())
{
// Reuse the destination sampler for the bias
writer->op_load_once(bia, dst_sampler);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
index c8bf999261..2935ba45ea 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
@@ -24,22 +24,24 @@
#include "GpuCkwElementwiseBinary.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
#include "ckw/types/TensorSamplerTypes.h"
+
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h"
#include "src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
+
#include <algorithm>
#include <string>
@@ -53,11 +55,7 @@ namespace dynamic_fusion
GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuCkwComponentDriver{ id, tensors },
- _lhs{},
- _rhs{},
- _dst{},
- _attributes{ attributes }
+ : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}
{
_lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -65,15 +63,20 @@ GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId
ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
}
-void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
const auto n0 = static_cast<int32_t>(root_window.x().step());
const auto m0 = static_cast<int32_t>(root_window.y().step());
- GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
- GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *lhs =
+ vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
+ GpuCkwComponentArgument *rhs =
+ vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32);
auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
@@ -86,32 +89,36 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr
auto &const_0 = writer->declare_tile("0", 0);
// Load the LHS and RHS tiles
- if(!lhs->has_tile())
+ if (!lhs->has_tile())
{
- auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1), n0, m0, "lhs_", const_0);
+ auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1),
+ n0, m0, "lhs_", const_0);
sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
sampler.z(const_0);
sampler.b(gid_2);
writer->op_load_once(lhs, sampler);
}
- if(!rhs->has_tile())
+ if (!rhs->has_tile())
{
- auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1), n0, m0, "rhs_", const_0);
+ auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1),
+ n0, m0, "rhs_", const_0);
sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
sampler.z(const_0);
sampler.b(gid_2);
writer->op_load_once(rhs, sampler);
}
- auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1), n0, m0, "dst_", const_0);
+ auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1),
+ n0, m0, "dst_", const_0);
dst_sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
dst_sampler.z(const_0);
dst_sampler.b(gid_2);
// Prepare the output tile.
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
- auto &tile = writer->declare_tile("dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width()));
+ auto &tile = writer->declare_tile(
+ "dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width()));
dst->init_virtual_tensor(tile, dst_sampler);
}
@@ -131,9 +138,10 @@ Window GpuCkwElementwiseBinary::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
@@ -141,11 +149,12 @@ Window GpuCkwElementwiseBinary::get_window() const
std::string GpuCkwElementwiseBinary::get_name(const ComponentGroup &comp_group) const
{
ARM_COMPUTE_UNUSED(comp_group);
- const std::vector<std::string> build_params =
- {
+ const std::vector<std::string> build_params = {
"elementwise_binary",
- "op", to_string(_attributes.operation()),
- "dt", lower_string(string_from_data_type(_dst->data_type())),
+ "op",
+ to_string(_attributes.operation()),
+ "dt",
+ lower_string(string_from_data_type(_dst->data_type())),
};
return join(build_params, "_");
}
@@ -154,13 +163,16 @@ std::string GpuCkwElementwiseBinary::get_tuner_id(const ComponentGroup &comp_gro
{
ARM_COMPUTE_UNUSED(comp_group);
/// NOTE: Hardcoded for now, the parameters should ideally be exported by ckw (a selection of constant tiles)
- std::vector<std::string> build_params =
- {
+ std::vector<std::string> build_params = {
"elementwise_binary",
- "op", to_string(_attributes.operation()),
- "dt", lower_string(string_from_data_type(_dst->data_type())),
- "dst_dim0", support::cpp11::to_string(_dst->dimension(0)),
- "dst_dim1", support::cpp11::to_string(_dst->dimension(1)),
+ "op",
+ to_string(_attributes.operation()),
+ "dt",
+ lower_string(string_from_data_type(_dst->data_type())),
+ "dst_dim0",
+ support::cpp11::to_string(_dst->dimension(0)),
+ "dst_dim1",
+ support::cpp11::to_string(_dst->dimension(1)),
};
return join(build_params, "_");
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
index e9c41530f8..1a20d4c533 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
@@ -46,17 +46,17 @@ public:
* @param[in] tensors Tensor arguments to the component
* @param[in] attributes Component attributes
*/
- GpuCkwElementwiseBinary(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ GpuCkwElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwElementwiseBinary);
/** Destructor */
~GpuCkwElementwiseBinary() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- Window get_window() const override;
- std::string get_name(const ComponentGroup &comp_group) const override;
- std::string get_tuner_id(const ComponentGroup &comp_group) const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
+ std::string get_name(const ComponentGroup &comp_group) const override;
+ std::string get_tuner_id(const ComponentGroup &comp_group) const override;
private:
const ITensorInfo *_lhs;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
index 9c9a298132..8ab3ec3a55 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
@@ -24,17 +24,18 @@
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
+
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
using namespace ckw;
@@ -48,11 +49,7 @@ GpuCkwPool2d::GpuCkwPool2d(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes },
- _settings{ settings }
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
@@ -60,14 +57,18 @@ GpuCkwPool2d::GpuCkwPool2d(ComponentId id,
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
}
-void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
TileOperand &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32);
TileOperand &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
@@ -90,23 +91,26 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
const auto src_data_type = _src->data_type();
// Check if this is global pooling path
- const bool is_global_pooling = (pool_size_x == src_width) && (pool_size_y == src_height) && (pad_x == 0) && (pad_y == 0);
+ const bool is_global_pooling =
+ (pool_size_x == src_width) && (pool_size_y == src_height) && (pad_x == 0) && (pad_y == 0);
// Check if this a case of FP_MIXED_PRECISION
- const bool use_fp_mixed_precision = (src_data_type == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
- const auto acc_data_type = (use_fp_mixed_precision) ? (DataType::F32) : (src_data_type);
+ const bool use_fp_mixed_precision =
+ (src_data_type == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
+ const auto acc_data_type = (use_fp_mixed_precision) ? (DataType::F32) : (src_data_type);
TileOperand &const_0 = writer->declare_tile("0", 0);
const TileOperand &const_1 = writer->declare_tile("1", 1);
const TileOperand &const_lowest_value = writer->declare_tile("LOWEST_VALUE", std::numeric_limits<float>::lowest());
const TileOperand &pool_size_x_tile = writer->declare_tile("POOL_SIZE_X", pool_size_x);
const TileOperand &pool_size_y_tile = writer->declare_tile("POOL_SIZE_Y", pool_size_y);
- const TileOperand &stride_x_tile = writer->declare_tile("STRIDE_X", static_cast<int32_t>(_attributes.stride().x()));
- const TileOperand &stride_y_tile = writer->declare_tile("STRIDE_Y", static_cast<int32_t>(_attributes.stride().y()));
- const TileOperand &pad_x_tile = writer->declare_tile("PAD_X", pad_x);
- const TileOperand &pad_y_tile = writer->declare_tile("PAD_Y", pad_y);
- const TileOperand &dst_height_tile = writer->declare_tile("DST_HEIGHT", static_cast<int32_t>(_dst->dimension(height_idx)));
- const TileOperand &src_height_tile = writer->declare_tile("SRC_HEIGHT", src_height);
- const TileOperand &src_width_tile = writer->declare_tile("SRC_WIDTH", src_width);
+ const TileOperand &stride_x_tile = writer->declare_tile("STRIDE_X", static_cast<int32_t>(_attributes.stride().x()));
+ const TileOperand &stride_y_tile = writer->declare_tile("STRIDE_Y", static_cast<int32_t>(_attributes.stride().y()));
+ const TileOperand &pad_x_tile = writer->declare_tile("PAD_X", pad_x);
+ const TileOperand &pad_y_tile = writer->declare_tile("PAD_Y", pad_y);
+ const TileOperand &dst_height_tile =
+ writer->declare_tile("DST_HEIGHT", static_cast<int32_t>(_dst->dimension(height_idx)));
+ const TileOperand &src_height_tile = writer->declare_tile("SRC_HEIGHT", src_height);
+ const TileOperand &src_width_tile = writer->declare_tile("SRC_WIDTH", src_width);
TileOperand &idx_out_n = writer->declare_tile("idx_out_n", ckw::DataType::Int32);
TileOperand &idx_out_h = writer->declare_tile("idx_out_h", ckw::DataType::Int32);
@@ -145,7 +149,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
// Prepare dst tensor and tile
TileInfo dst_tile_info = TileInfo(to_ckw(src_data_type), m0, n0);
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
TileOperand &dst_tile = writer->declare_tile("dst_tile", dst_tile_info);
dst->init_virtual_tensor(dst_tile, dst_sampler);
@@ -156,14 +160,15 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
const TileOperand &res_tile = writer->declare_tile("res_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
// Initialise result tile with appropriate value
- if(_attributes.pool_type() == PoolingType::MAX)
+ if (_attributes.pool_type() == PoolingType::MAX)
{
- if(_settings.use_inf_as_limit())
+ if (_settings.use_inf_as_limit())
{
TileContainer minus_inf_tile_container;
std::vector<std::string> value = std::vector<std::string>(n0, "(-INFINITY)");
- minus_inf_tile_container.push_back({ value });
- const TileOperand &minus_inf = writer->declare_tile("minus_inf_const", minus_inf_tile_container, to_ckw(acc_data_type));
+ minus_inf_tile_container.push_back({value});
+ const TileOperand &minus_inf =
+ writer->declare_tile("minus_inf_const", minus_inf_tile_container, to_ckw(acc_data_type));
writer->op_assign(res_tile, minus_inf);
}
else
@@ -209,7 +214,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
writer->op_binary_elementwise_function(pool_y_e, BinaryFunction::Min, pool_size_y_tile, pool_y_e);
const TileOperand &filter_size = writer->declare_tile("filter_size", ckw::DataType::Int32);
- if(_attributes.exclude_padding())
+ if (_attributes.exclude_padding())
{
const TileOperand &y_diff = writer->declare_tile("y_diff", ckw::DataType::Int32);
const TileOperand &x_diff = writer->declare_tile("x_diff", ckw::DataType::Int32);
@@ -227,7 +232,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
const TileOperand &x = writer->declare_tile("x", ckw::DataType::Int32);
const TileOperand &y = writer->declare_tile("y", ckw::DataType::Int32);
- if(is_global_pooling)
+ if (is_global_pooling)
{
writer->op_assign(x, const_0);
writer->op_assign(y, const_0);
@@ -242,76 +247,80 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
}
// Y dim for-loop
- writer->op_for_loop(y, BinaryOp::Less, pool_y_e, y, AssignmentOp::Increment, const_1, [&]()
- {
- // Reset the iterator for the inner loop
- if(is_global_pooling)
- {
- writer->op_assign(x, const_0);
- }
- else
+ writer->op_for_loop(
+ y, BinaryOp::Less, pool_y_e, y, AssignmentOp::Increment, const_1,
+ [&]()
{
- writer->op_assign(x, pool_x_s);
- }
-
- TileOperand &a_y = writer->declare_tile("a_y", ckw::DataType::Int32);
- writer->op_binary_expression(a_y, idx_in_h, BinaryOp::Add, y);
-
- // X dim for-loop
- writer->op_for_loop(x, BinaryOp::Less, pool_x_e, x, AssignmentOp::Increment, const_1, [&]()
- {
- TileOperand &a_x = writer->declare_tile("a_x", ckw::DataType::Int32);
- writer->op_binary_expression(a_x, idx_in_w, BinaryOp::Add, x);
-
- TileOperand &src_tile = writer->declare_tile("src_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
-
- src_sampler.y(a_x);
- src_sampler.z(a_y);
-
- // Load src tile
- if(use_fp_mixed_precision)
+ // Reset the iterator for the inner loop
+ if (is_global_pooling)
{
- TileOperand &src_uncasted_tile = writer->declare_tile("uncasted_src_tile", dst_tile_info);
- writer->op_load(src_uncasted_tile, src->tensor(), src_sampler);
- writer->op_cast_expression(src_tile, src_uncasted_tile, ckw::ConvertPolicy::None);
+ writer->op_assign(x, const_0);
}
else
{
- writer->op_load(src_tile, src->tensor(), src_sampler);
+ writer->op_assign(x, pool_x_s);
}
- // Take the square of the input, for L2 Pooling
- if(_attributes.pool_type() == PoolingType::L2)
- {
- writer->op_binary_expression(src_tile, src_tile, BinaryOp::Mul, src_tile);
- }
-
- // Perfom Pooling op
- if(_attributes.pool_type() == PoolingType::MAX)
- {
- writer->op_binary_elementwise_function(res_tile, BinaryFunction::Max, res_tile, src_tile);
- }
- else
- {
- writer->op_binary_expression(res_tile, res_tile, BinaryOp::Add, src_tile);
- }
+ TileOperand &a_y = writer->declare_tile("a_y", ckw::DataType::Int32);
+ writer->op_binary_expression(a_y, idx_in_h, BinaryOp::Add, y);
+
+ // X dim for-loop
+ writer->op_for_loop(
+ x, BinaryOp::Less, pool_x_e, x, AssignmentOp::Increment, const_1,
+ [&]()
+ {
+ TileOperand &a_x = writer->declare_tile("a_x", ckw::DataType::Int32);
+ writer->op_binary_expression(a_x, idx_in_w, BinaryOp::Add, x);
+
+ TileOperand &src_tile = writer->declare_tile("src_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
+
+ src_sampler.y(a_x);
+ src_sampler.z(a_y);
+
+ // Load src tile
+ if (use_fp_mixed_precision)
+ {
+ TileOperand &src_uncasted_tile = writer->declare_tile("uncasted_src_tile", dst_tile_info);
+ writer->op_load(src_uncasted_tile, src->tensor(), src_sampler);
+ writer->op_cast_expression(src_tile, src_uncasted_tile, ckw::ConvertPolicy::None);
+ }
+ else
+ {
+ writer->op_load(src_tile, src->tensor(), src_sampler);
+ }
+
+ // Take the square of the input, for L2 Pooling
+ if (_attributes.pool_type() == PoolingType::L2)
+ {
+ writer->op_binary_expression(src_tile, src_tile, BinaryOp::Mul, src_tile);
+ }
+
+ // Perfom Pooling op
+ if (_attributes.pool_type() == PoolingType::MAX)
+ {
+ writer->op_binary_elementwise_function(res_tile, BinaryFunction::Max, res_tile, src_tile);
+ }
+ else
+ {
+ writer->op_binary_expression(res_tile, res_tile, BinaryOp::Add, src_tile);
+ }
+ });
});
- });
- if((_attributes.pool_type() == PoolingType::AVG) || (_attributes.pool_type() == PoolingType::L2))
+ if ((_attributes.pool_type() == PoolingType::AVG) || (_attributes.pool_type() == PoolingType::L2))
{
// filter_size is automatically broadcasted in the operation
writer->op_binary_expression(res_tile, res_tile, BinaryOp::Div, filter_size);
}
// Take square root of the result in L2 pooling
- if(_attributes.pool_type() == PoolingType::L2)
+ if (_attributes.pool_type() == PoolingType::L2)
{
writer->op_unary_elementwise_function(res_tile, UnaryFunction::Sqrt, res_tile);
}
// Store the results and do casting if FP_MIXED_PRECISION
- if(use_fp_mixed_precision)
+ if (use_fp_mixed_precision)
{
writer->op_cast_expression(dst_tile, res_tile, ckw::ConvertPolicy::None);
}
@@ -326,7 +335,7 @@ Window GpuCkwPool2d::get_window() const
ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
TensorShape output_shape = _dst->tensor_shape();
- const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
+ const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
// Create and configure kernel window
auto win = calculate_max_window(output_shape, Steps(vec_size));
win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size.
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
index 2ccf255236..822282a108 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
@@ -59,9 +59,11 @@ public:
/** Destructor */
~GpuCkwPool2d() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- Window get_window() const override;
- std::string get_name(const ComponentGroup &comp_group) const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
+ std::string get_name(const ComponentGroup &comp_group) const override;
private:
const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
index d997c82dae..f2a7d41afd 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
@@ -28,14 +28,13 @@
#include "src/core/helpers/WindowHelpers.h"
#include "src/core/utils/ScaleUtils.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
-
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
namespace arm_compute
@@ -49,20 +48,17 @@ namespace
constexpr unsigned int opencl_vector_size_in_bytes = 16;
} // namespace
-GpuCkwResize::GpuCkwResize(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes }
+GpuCkwResize::GpuCkwResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
}
-void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const size_t width_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::WIDTH);
const size_t height_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::HEIGHT);
@@ -72,12 +68,16 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
const int32_t m0 = root_window.y().step();
const int32_t partial_n0 = _dst->dimension(0) % n0;
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Constants
- const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx), _attributes.align_corners());
- const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx), _attributes.align_corners());
+ const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx),
+ _attributes.align_corners());
+ const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx),
+ _attributes.align_corners());
const auto &tile_scale_x = writer->declare_tile("scale_x", scale_x);
const auto &tile_scale_y = writer->declare_tile("scale_y", scale_y);
const auto &tile_0 = writer->declare_tile("0", 0);
@@ -112,7 +112,7 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
const auto &tile_xi_f = writer->declare_tile("xi_f", ckw::DataType::Fp32);
const auto &tile_yi_f = writer->declare_tile("yi_f", ckw::DataType::Fp32);
- switch(_attributes.sampling_policy())
+ switch (_attributes.sampling_policy())
{
case SamplingPolicy::TOP_LEFT:
// xi_f = (xo * scale_x)
@@ -138,7 +138,7 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
ARM_COMPUTE_ERROR("Unsupported sampling policy");
}
- if(_attributes.align_corners())
+ if (_attributes.align_corners())
{
writer->op_unary_elementwise_function(tile_xi_f, UnaryFunction::Round, tile_xi_f);
writer->op_unary_elementwise_function(tile_yi_f, UnaryFunction::Round, tile_yi_f);
@@ -161,8 +161,10 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
auto &tile_xi0 = writer->declare_tile("xi0", ckw::DataType::Int32);
auto &tile_yi0 = writer->declare_tile("yi0", ckw::DataType::Int32);
- writer->op_ternary_elementwise_function(tile_xi0, TernaryFunction::Clamp, tile_xi_f_int, tile_0, tile_src_w_minus_1);
- writer->op_ternary_elementwise_function(tile_yi0, TernaryFunction::Clamp, tile_yi_f_int, tile_0, tile_src_h_minus_1);
+ writer->op_ternary_elementwise_function(tile_xi0, TernaryFunction::Clamp, tile_xi_f_int, tile_0,
+ tile_src_w_minus_1);
+ writer->op_ternary_elementwise_function(tile_yi0, TernaryFunction::Clamp, tile_yi_f_int, tile_0,
+ tile_src_h_minus_1);
TensorTileSampler src_sampler;
src_sampler.x(tile_co);
@@ -199,7 +201,9 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
writer->op_assign(tile_dst, tile_src);
}
-void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const size_t width_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::WIDTH);
const size_t height_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::HEIGHT);
@@ -209,12 +213,16 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
const int32_t m0 = root_window.y().step();
const int32_t partial_n0 = _dst->dimension(0) % n0;
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Constants
- const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx), _attributes.align_corners());
- const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx), _attributes.align_corners());
+ const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx),
+ _attributes.align_corners());
+ const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx),
+ _attributes.align_corners());
const auto &tile_scale_x = writer->declare_tile("scale_x", scale_x);
const auto &tile_scale_y = writer->declare_tile("scale_y", scale_y);
const auto &tile_0 = writer->declare_tile("0", 0);
@@ -251,7 +259,7 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
const auto &tile_xi_f = writer->declare_tile("xi_f", ckw::DataType::Fp32);
const auto &tile_yi_f = writer->declare_tile("yi_f", ckw::DataType::Fp32);
- switch(_attributes.sampling_policy())
+ switch (_attributes.sampling_policy())
{
case SamplingPolicy::TOP_LEFT:
// xi_f = (xo * scale_x)
@@ -312,8 +320,10 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
writer->op_ternary_elementwise_function(tile_xi0, TernaryFunction::Clamp, tile_xi, tile_0, tile_src_w_minus_1);
writer->op_ternary_elementwise_function(tile_yi0, TernaryFunction::Clamp, tile_yi, tile_0, tile_src_h_minus_1);
- writer->op_ternary_elementwise_function(tile_xi1, TernaryFunction::Clamp, tile_xi_plus_1, tile_0, tile_src_w_minus_1);
- writer->op_ternary_elementwise_function(tile_yi1, TernaryFunction::Clamp, tile_yi_plus_1, tile_0, tile_src_h_minus_1);
+ writer->op_ternary_elementwise_function(tile_xi1, TernaryFunction::Clamp, tile_xi_plus_1, tile_0,
+ tile_src_w_minus_1);
+ writer->op_ternary_elementwise_function(tile_yi1, TernaryFunction::Clamp, tile_yi_plus_1, tile_0,
+ tile_src_h_minus_1);
TensorTileSampler in_sampler;
in_sampler.x(tile_co);
@@ -388,7 +398,7 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
writer->op_binary_expression(tile_a1, tile_yi_f, BinaryOp::Sub, tile_yi_float);
writer->op_binary_expression(tile_b1, tile_1, BinaryOp::Sub, tile_a1);
- if(is_data_type_float(_src->data_type()))
+ if (is_data_type_float(_src->data_type()))
{
// Cast weights to source type
const auto &tile_a_src_type = writer->declare_tile("a_src_t", to_ckw(_src->data_type()));
@@ -461,9 +471,11 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
}
}
-void GpuCkwResize::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwResize::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
- switch(_attributes.interpolation_policy())
+ switch (_attributes.interpolation_policy())
{
case InterpolationPolicy::NEAREST_NEIGHBOR:
do_nearest_neighbor_resize(comp_group, vtable, writer);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
index 8917391537..889706b0c0 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
@@ -24,10 +24,12 @@
#include "GpuCkwStore.h"
#include "arm_compute/core/Error.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
#include <string>
namespace arm_compute
@@ -37,12 +39,14 @@ namespace experimental
namespace dynamic_fusion
{
GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{}
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
}
-void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwStore::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
index 8e35651caf..f1f0e6747b 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
@@ -48,8 +48,10 @@ public:
/** Destructor */
~GpuCkwStore() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- std::string get_name(const ComponentGroup &comp_group) const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ std::string get_name(const ComponentGroup &comp_group) const override;
private:
const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h
index e2b8584b99..6ba2b2f651 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/utils/misc/Utility.h"
#include "ckw/TensorTileSampler.h"
+
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
@@ -44,9 +45,14 @@ using SamplerCreator = std::function<TensorTileSampler(GpuCkwScopedKernelWriter
/** Load src and dst tiles of dimension [m0, n0] only when not loaded and prepare the sampler
*/
-inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &writer, GpuCkwComponentArgument *src, GpuCkwComponentArgument *dst, int32_t m0, int32_t n0, SamplerCreator create_sampler)
+inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &writer,
+ GpuCkwComponentArgument *src,
+ GpuCkwComponentArgument *dst,
+ int32_t m0,
+ int32_t n0,
+ SamplerCreator create_sampler)
{
- if(!src->has_tile())
+ if (!src->has_tile())
{
const auto sampler = create_sampler(writer, m0, n0);
writer->op_load_once(src, sampler);
@@ -61,7 +67,7 @@ inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &wri
const auto &sampler = src->tile_sampler();
// Prepare the output tile.
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info());
dst->init_virtual_tensor(tile, sampler);
@@ -78,7 +84,13 @@ inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &wri
* @param[in] prefix Prefix to all the tiles declared within this function
* @param[in] const_0 Constant tile of value 0
*/
-inline void get_coord(GpuCkwScopedKernelWriter writer, TileOperand &coord, const TileOperand &gid, int32_t step_v, int32_t leftover_step_v, const std::string &prefix, const TileOperand &const_0)
+inline void get_coord(GpuCkwScopedKernelWriter writer,
+ TileOperand &coord,
+ const TileOperand &gid,
+ int32_t step_v,
+ int32_t leftover_step_v,
+ const std::string &prefix,
+ const TileOperand &const_0)
{
auto &step = writer->declare_tile(prefix + "step", step_v);
auto &leftover_step = writer->declare_tile(prefix + "leftover_step", leftover_step_v);
@@ -122,8 +134,15 @@ inline void get_coord(GpuCkwScopedKernelWriter writer, TileOperand &coord, const
*
* @return TensorTileSampler
*/
-inline TensorTileSampler create_boundary_aware_2d_sampler(GpuCkwScopedKernelWriter writer, TileOperand &gid_0, TileOperand &gid_1, int32_t dim0_v, int32_t dim1_v, int32_t n0_v, int32_t m0_v,
- const std::string prefix, TileOperand &const_0)
+inline TensorTileSampler create_boundary_aware_2d_sampler(GpuCkwScopedKernelWriter writer,
+ TileOperand &gid_0,
+ TileOperand &gid_1,
+ int32_t dim0_v,
+ int32_t dim1_v,
+ int32_t n0_v,
+ int32_t m0_v,
+ const std::string prefix,
+ TileOperand &const_0)
{
// Clamp tile size [n0, m0] against dimension [dim0, dim1]
// This is needed to:
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
index 34b1283add..5da317bf38 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
#include "ckw/TensorInfo.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
namespace arm_compute
@@ -38,7 +39,7 @@ namespace dynamic_fusion
{
inline ckw::DataType to_ckw(DataType dt)
{
- switch(dt)
+ switch (dt)
{
case DataType::F32:
return ckw::DataType::Fp32;
@@ -65,21 +66,16 @@ inline ckw::DataType to_ckw(DataType dt)
inline ckw::TensorShape to_ckw(const TensorShape &shape)
{
- ARM_COMPUTE_ERROR_ON(shape.num_max_dimensions < std::tuple_size<ckw::TensorShape> {});
- ARM_COMPUTE_ERROR_ON(std::tuple_size<ckw::TensorShape> {} != 5);
+ ARM_COMPUTE_ERROR_ON(shape.num_max_dimensions < std::tuple_size<ckw::TensorShape>{});
+ ARM_COMPUTE_ERROR_ON(std::tuple_size<ckw::TensorShape>{} != 5);
/// NOTE: Overflow danger. Use size_t?
- return ckw::TensorShape
- {
- static_cast<int32_t>(shape[0]),
- static_cast<int32_t>(shape[1]),
- static_cast<int32_t>(shape[2]),
- static_cast<int32_t>(shape[3]),
- static_cast<int32_t>(shape[4])
- };
+ return ckw::TensorShape{static_cast<int32_t>(shape[0]), static_cast<int32_t>(shape[1]),
+ static_cast<int32_t>(shape[2]), static_cast<int32_t>(shape[3]),
+ static_cast<int32_t>(shape[4])};
}
inline ckw::TensorDataLayout to_ckw(DataLayout dl)
{
- switch(dl)
+ switch (dl)
{
case DataLayout::NHWC:
return ckw::TensorDataLayout::Nhwc;
@@ -91,18 +87,13 @@ inline ckw::TensorDataLayout to_ckw(DataLayout dl)
}
inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info)
{
- return ckw::TensorInfo
- {
- to_ckw(tensor_info.data_type()),
- to_ckw(tensor_info.tensor_shape()),
- to_ckw(tensor_info.data_layout()),
- tensor_info.id()
- };
+ return ckw::TensorInfo{to_ckw(tensor_info.data_type()), to_ckw(tensor_info.tensor_shape()),
+ to_ckw(tensor_info.data_layout()), tensor_info.id()};
}
inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
{
- switch(component)
+ switch (component)
{
case ckw::TensorComponentType::OffsetFirstElement:
return TensorComponentType::OffsetFirstElement;
@@ -142,7 +133,7 @@ inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
{
- switch(storage)
+ switch (storage)
{
case TensorStorageType::ClBufferUint8Ptr:
return ckw::TensorStorageType::BufferUint8Ptr;
@@ -159,7 +150,7 @@ inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
}
inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage)
{
- switch(storage)
+ switch (storage)
{
case ckw::TensorStorageType::BufferUint8Ptr:
return TensorStorageType::ClBufferUint8Ptr;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
index 9cb022fc10..0cba258940 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
@@ -25,6 +25,7 @@
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPE_CONVERTER_ELEMENTWISEBINARY
#include "ckw/types/Operators.h"
+
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
namespace arm_compute
@@ -35,7 +36,7 @@ namespace dynamic_fusion
{
inline ckw::BinaryOp to_ckw(const ElementwiseBinaryCommonAttributes &attributes)
{
- switch(attributes.operation())
+ switch (attributes.operation())
{
case ElementwiseBinaryCommonAttributes::ElementwiseOp::Add:
return ckw::BinaryOp::Add;
diff --git a/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h b/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
index f7f0029618..ee109a7e2b 100644
--- a/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
+++ b/src/dynamic_fusion/sketch/gpu/components/GpuKernelComponentFactory.h
@@ -24,8 +24,9 @@
#ifndef SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_GPUKERNELCOMPONENTFACTORY
-#include "Types.h"
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
+#include "Types.h"
#include <memory>
namespace arm_compute
@@ -49,13 +50,13 @@ public:
* @return std::unique_ptr<IGpuKernelComponent>
*/
template <typename T, typename... Args>
- std::unique_ptr<IGpuKernelComponent> create(Args &&... args)
+ std::unique_ptr<IGpuKernelComponent> create(Args &&...args)
{
return std::make_unique<T>(_count++, std::forward<Args>(args)...);
}
private:
- ComponentId _count{ 0 };
+ ComponentId _count{0};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
index af766a7ece..4b8eea2f57 100644
--- a/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
+++ b/src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h
@@ -24,11 +24,11 @@
#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_IGPUKERNELCOMPONENT
-#include "Types.h"
-
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
+#include "Types.h"
+
namespace arm_compute
{
namespace experimental
@@ -76,13 +76,8 @@ public:
* @param[in] properties Kernel component properties
* @param[in] tensors Tensor arguments to the components
*/
- IGpuKernelComponent(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors)
- : _id{ id },
- _properties{ properties },
- _tensors{ tensors }
+ IGpuKernelComponent(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
+ : _id{id}, _properties{properties}, _tensors{tensors}
{
}
/** Destructor */
@@ -117,7 +112,7 @@ public:
virtual GpuComponentType type() const = 0;
private:
- ComponentId _id{ -1 };
+ ComponentId _id{-1};
Properties _properties{};
ArgumentPack<ITensorInfo> _tensors{};
};
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
index c41257d18c..fdf528a65d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.cpp
@@ -68,17 +68,11 @@ ClComponentActivation::ClComponentActivation(ComponentId
const IGpuKernelComponent::Properties &properties,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuKernelComponent{ id, properties, tensors },
+ : IGpuKernelComponent{id, properties, tensors},
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<ClTemplateActivation>(id, tensors, attributes)
-}
+ _component_writer{std::make_unique<ClTemplateActivation>(id, tensors, attributes)}
#else //ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<GpuCkwActivation>(id, tensors, attributes)
-}
+ _component_writer{std::make_unique<GpuCkwActivation>(id, tensors, attributes)}
#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
index 9b090af988..02c854356a 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h
@@ -25,9 +25,8 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTACTIVATION
#include "arm_compute/function_info/ActivationLayerInfo.h"
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-#include "arm_compute/function_info/ActivationLayerInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
namespace arm_compute
{
@@ -79,20 +78,17 @@ public:
* |F16 |F16 |
* |F32 |F32 |
*/
- static Status validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ static Status
+ validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
/** Constructor
*
* Similar to @ref ClComponentActivation::validate()
*/
- ClComponentActivation(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ ClComponentActivation(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes);
/** Destructor */
~ClComponentActivation() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
index 635869f817..b1636795a3 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.cpp
@@ -24,6 +24,7 @@
#include "ClComponentCast.h"
#include "arm_compute/core/Error.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
@@ -38,11 +39,10 @@ namespace experimental
{
namespace dynamic_fusion
{
-Status ClComponentCast::validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
+Status ClComponentCast::validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
{
ARM_COMPUTE_UNUSED(properties, attributes, settings);
@@ -53,13 +53,15 @@ Status ClComponentCast::validate(
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(dst);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON(src == dst);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == attributes.data_type(), "input and target data types should be different");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->data_type() == attributes.data_type(),
+ "input and target data types should be different");
// Validate in case of configured dst
- if(dst->total_size() > 0)
+ if (dst->total_size() > 0)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_SHAPES(src, dst);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst->data_type() != attributes.data_type(), "dst and target data types should be same");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(dst->data_type() != attributes.data_type(),
+ "dst and target data types should be same");
}
return Status{};
@@ -69,17 +71,11 @@ ClComponentCast::ClComponentCast(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuKernelComponent{ id, properties, tensors },
+ : IGpuKernelComponent{id, properties, tensors},
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<ClTemplateCast>(id, tensors, attributes)
-}
+ _component_writer{std::make_unique<ClTemplateCast>(id, tensors, attributes)}
#else //ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<GpuCkwCast>(id, tensors, attributes)
-}
+ _component_writer{std::make_unique<GpuCkwCast>(id, tensors, attributes)}
#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
ARM_COMPUTE_UNUSED(attributes, settings);
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
index 37b8cbb6c9..ed77b1203b 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTCAST
#include "arm_compute/dynamic_fusion/sketch/attributes/CastAttributes.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
namespace arm_compute
@@ -93,11 +94,10 @@ public:
* |F16 | U8, S8, U16, S16, U32, S32, F32 |
* |F32 | U8, S8, U16, S16, U32, S32, F16 |
*/
- static Status validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
+ static Status validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
/** Constructor
*
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
index 5626093079..d95e0be1f2 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h"
@@ -103,11 +104,10 @@ unsigned int Settings::m0() const
return _m0;
}
-Status ClComponentDepthwiseConv2d::validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
+Status ClComponentDepthwiseConv2d::validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
{
ARM_COMPUTE_UNUSED(properties, settings);
const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
@@ -121,7 +121,7 @@ Status ClComponentDepthwiseConv2d::validate(
// Matching data type
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, wei);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, bia);
}
@@ -129,7 +129,7 @@ Status ClComponentDepthwiseConv2d::validate(
// Matching data layout
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, wei);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, bia);
}
@@ -138,7 +138,7 @@ Status ClComponentDepthwiseConv2d::validate(
ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
ARM_COMPUTE_RETURN_ERROR_ON(wei->tensor_shape().total_size() == 0);
ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON(bia->tensor_shape().total_size() == 0);
}
@@ -148,16 +148,17 @@ Status ClComponentDepthwiseConv2d::validate(
const DataLayout data_layout = src->data_layout();
const size_t channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) != (src->dimension(channel_idx) * attributes.depth_multiplier()));
+ ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) !=
+ (src->dimension(channel_idx) * attributes.depth_multiplier()));
ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->num_dimensions() > 3, "Weights can be at most 3 dimensional");
// dst shape is correct
- const PadStrideInfo pad_stride_info = PadStrideInfo(attributes.stride().x(), attributes.stride().y(),
- attributes.pad().left, attributes.pad().right,
- attributes.pad().top, attributes.pad().bottom,
- attributes.dimension_rounding_type());
- const ConvolutionInfo conv_info{ pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(), attributes.dilation() };
- const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info);
+ const PadStrideInfo pad_stride_info =
+ PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, attributes.pad().right,
+ attributes.pad().top, attributes.pad().bottom, attributes.dimension_rounding_type());
+ const ConvolutionInfo conv_info{pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(),
+ attributes.dilation()};
+ const TensorShape output_shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(), output_shape);
@@ -168,19 +169,22 @@ Status ClComponentDepthwiseConv2d::validate(
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.pad_stride_info.stride().first > 1 && settings.m0() != 1);
ARM_COMPUTE_RETURN_ERROR_ON(conv_info.dilation.x() > 1 && settings.m0() != 1);
- if(conv_info.depth_multiplier > 1 && settings.n0() > 1)
+ if (conv_info.depth_multiplier > 1 && settings.n0() > 1)
{
ARM_COMPUTE_RETURN_ERROR_ON((conv_info.depth_multiplier % settings.n0()) != 0);
}
// Check export weights to cl image
- ARM_COMPUTE_RETURN_ERROR_ON_MSG((settings.export_weights_to_cl_image() == true) && (export_to_cl_image(wei) == false), "Weights cannot be exported to cl_image!");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((settings.export_weights_to_cl_image() == true) &&
+ (export_to_cl_image(wei) == false),
+ "Weights cannot be exported to cl_image!");
ARM_COMPUTE_RETURN_ERROR_ON((settings.export_weights_to_cl_image() == true) && ((settings.n0() % 4) != 0));
- ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) != (src->dimension(channel_idx) * conv_info.depth_multiplier));
+ ARM_COMPUTE_RETURN_ERROR_ON(wei->dimension(channel_idx) !=
+ (src->dimension(channel_idx) * conv_info.depth_multiplier));
// bia shape is correct
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->dimension(0) != output_shape[channel_idx],
"Biases size and number of dst feature maps should match");
@@ -198,14 +202,13 @@ Status ClComponentDepthwiseConv2d::validate(
return Status{};
}
-ClComponentDepthwiseConv2d::ClComponentDepthwiseConv2d(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
- : IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateDepthwiseConv2d>(id, tensors, attributes, settings) }
+ClComponentDepthwiseConv2d::ClComponentDepthwiseConv2d(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuKernelComponent{id, properties, tensors},
+ _component_writer{std::make_unique<ClTemplateDepthwiseConv2d>(id, tensors, attributes, settings)}
{
}
ClComponentDepthwiseConv2d::~ClComponentDepthwiseConv2d()
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
index 0e2b5f14cb..b3e1bd222d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h
@@ -25,7 +25,9 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTDEPTHWISECONV2D
#include "arm_compute/core/Error.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
#include <memory>
namespace arm_compute
@@ -77,12 +79,12 @@ public:
unsigned int m0() const;
private:
- bool _export_input_to_cl_image{ false }; /**< Export input to cl_image */
- bool _export_weights_to_cl_image{ false }; /**< Export the weights to cl_image */
- bool _fast_relaxed_math{ true }; /**< Enable/disable -cl-fast-relaxed-math flag */
- bool _is_fma_available{ false }; /**< Is fma instruction available */
- unsigned int _n0{ 0 }; /**< Number of columns processed by each thread */
- unsigned int _m0{ 0 }; /**< Number of rows processed by each thread */
+ bool _export_input_to_cl_image{false}; /**< Export input to cl_image */
+ bool _export_weights_to_cl_image{false}; /**< Export the weights to cl_image */
+ bool _fast_relaxed_math{true}; /**< Enable/disable -cl-fast-relaxed-math flag */
+ bool _is_fma_available{false}; /**< Is fma instruction available */
+ unsigned int _n0{0}; /**< Number of columns processed by each thread */
+ unsigned int _m0{0}; /**< Number of rows processed by each thread */
};
/** Forward declaration */
@@ -127,22 +129,20 @@ public:
* |F16 |F16 |F16 |F16 |
* |F32 |F32 |F32 |F32 |
*/
- static Status validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
+ static Status validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
/** Constructor
*
* Similar to @ref ClComponentDepthwiseConv2d::validate()
*/
- ClComponentDepthwiseConv2d(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
+ ClComponentDepthwiseConv2d(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
/** Destructor */
~ClComponentDepthwiseConv2d() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
index a713c82003..98f3d6a882 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.cpp
@@ -23,8 +23,8 @@
*/
#include "ClComponentDirectConv2d.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
#include "src/core/CL/CLValidate.h"
@@ -57,7 +57,8 @@ bool ClComponentDirectConv2dSettings::fast_relaxed_math() const
return _fast_relaxed_math;
}
-ClComponentDirectConv2dSettings &ClComponentDirectConv2dSettings::direct_conv_descriptor(const DirectConvComputeKernelInfo &desc)
+ClComponentDirectConv2dSettings &
+ClComponentDirectConv2dSettings::direct_conv_descriptor(const DirectConvComputeKernelInfo &desc)
{
_desc = desc;
return *this;
@@ -68,11 +69,10 @@ DirectConvComputeKernelInfo ClComponentDirectConv2dSettings::direct_conv_descrip
return _desc;
}
-Status ClComponentDirectConv2d::validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
+Status ClComponentDirectConv2d::validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
{
ARM_COMPUTE_UNUSED(properties);
const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
@@ -86,7 +86,7 @@ Status ClComponentDirectConv2d::validate(
// Matching data type
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, wei);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, bia);
}
@@ -94,7 +94,7 @@ Status ClComponentDirectConv2d::validate(
// Matching data layout
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, wei);
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, dst);
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_LAYOUT(src, bia);
}
@@ -103,7 +103,7 @@ Status ClComponentDirectConv2d::validate(
ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
ARM_COMPUTE_RETURN_ERROR_ON(wei->tensor_shape().total_size() == 0);
ARM_COMPUTE_RETURN_ERROR_ON(dst->tensor_shape().total_size() == 0);
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON(bia->tensor_shape().total_size() == 0);
}
@@ -112,22 +112,23 @@ Status ClComponentDirectConv2d::validate(
// wei shape is correct
const DataLayout data_layout = src->data_layout();
const int channel_idx = get_data_layout_dimension_index(data_layout, DataLayoutDimension::CHANNEL);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->dimension(channel_idx) != src->dimension(channel_idx), "Weights feature map dimension should match the respective src's one");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->dimension(channel_idx) != src->dimension(channel_idx),
+ "Weights feature map dimension should match the respective src's one");
ARM_COMPUTE_RETURN_ERROR_ON_MSG(wei->num_dimensions() > 4, "Weights can be at most 4 dimensional");
// dst shape is correct
- PadStrideInfo legacy_pad_stride(attributes.stride().x(), attributes.stride().y(), attributes.pad().left, attributes.pad().right, attributes.pad().top,
- attributes.pad().bottom, DimensionRoundingType{});
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(),
- misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, legacy_pad_stride));
+ PadStrideInfo legacy_pad_stride(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+ attributes.pad().right, attributes.pad().top, attributes.pad().bottom,
+ DimensionRoundingType{});
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(
+ dst->tensor_shape(), misc::shape_calculator::compute_deep_convolution_shape(*src, *wei, legacy_pad_stride));
// bia shape is correct
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->dimension(0) != wei->dimension(3),
"Biases size and number of dst feature maps should match");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->num_dimensions() > 1,
- "Biases should be one dimensional");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(bia->num_dimensions() > 1, "Biases should be one dimensional");
}
// 2. Check support level
@@ -137,24 +138,25 @@ Status ClComponentDirectConv2d::validate(
ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
const auto desc = settings.direct_conv_descriptor();
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.n0 != 1 && desc.n0 != 2 && desc.n0 != 3 && desc.n0 != 4 && desc.n0 != 8 && desc.n0 != 16,
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.n0 != 1 && desc.n0 != 2 && desc.n0 != 3 && desc.n0 != 4 && desc.n0 != 8 &&
+ desc.n0 != 16,
"N0 can only be: 1, 2, 3, 4, 8, and 16");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 1 && desc.k0 != 2 && desc.k0 != 3 && desc.k0 != 4 && desc.k0 != 8 && desc.k0 != 16,
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(desc.k0 != 1 && desc.k0 != 2 && desc.k0 != 3 && desc.k0 != 4 && desc.k0 != 8 &&
+ desc.k0 != 16,
"K0 can only be: 1, 2, 3, 4, 8, and 16");
return Status{};
}
-ClComponentDirectConv2d::ClComponentDirectConv2d(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
- : IGpuKernelComponent{ id, properties, tensors },
+ClComponentDirectConv2d::ClComponentDirectConv2d(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuKernelComponent{id, properties, tensors},
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{ std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings) }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{ std::make_unique<GpuCkwDirectConv2d>(id, tensors, attributes, settings) }
+ _component_writer{std::make_unique<ClTemplateDirectConv2d>(id, tensors, attributes, settings)}
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer{std::make_unique<GpuCkwDirectConv2d>(id, tensors, attributes, settings)}
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -165,7 +167,7 @@ ClComponentDirectConv2d::~ClComponentDirectConv2d()
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *ClComponentDirectConv2d::template_writer() const
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentDirectConv2d::ckw_component_driver() const
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
index 24acb1b2c1..d6d9705d3c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h
@@ -26,7 +26,9 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/KernelDescriptors.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
#include <memory>
namespace arm_compute
@@ -61,7 +63,7 @@ public:
DirectConvComputeKernelInfo direct_conv_descriptor() const;
private:
- bool _fast_relaxed_math{ true };
+ bool _fast_relaxed_math{true};
DirectConvComputeKernelInfo _desc{}; // Direct convolution descriptor
};
@@ -111,22 +113,20 @@ public:
* |F16 |F16 |F16 |F16 |
* |F32 |F32 |F32 |F32 |
*/
- static Status validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
+ static Status validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
/** Constructor
*
* Similar to @ref ClComponentDirectConv2d::validate()
*/
- ClComponentDirectConv2d(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
+ ClComponentDirectConv2d(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
/** Destructor */
~ClComponentDirectConv2d() override;
@@ -142,7 +142,7 @@ public:
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
#else // ACL_INTERNAL_TEST_CKW_IN_DF
- const IGpuCkwComponentDriver *ckw_component_driver() const override;
+ const IGpuCkwComponentDriver *ckw_component_driver() const override;
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
/** Get component type */
GpuComponentType type() const override
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
index 88d729170c..5b136427e4 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.cpp
@@ -24,6 +24,7 @@
#include "ClComponentElementwiseBinary.h"
#include "arm_compute/core/Validate.h"
+
#include "src/core/CL/CLValidate.h"
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h"
@@ -39,56 +40,55 @@ namespace dynamic_fusion
{
namespace
{
-std::set<ElementwiseBinaryCommonAttributes::ElementwiseOp> supported_ops
-{
- ElementwiseBinaryCommonAttributes::ElementwiseOp::Add,
- ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub,
- ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul
-};
+std::set<ElementwiseBinaryCommonAttributes::ElementwiseOp> supported_ops{
+ ElementwiseBinaryCommonAttributes::ElementwiseOp::Add, ElementwiseBinaryCommonAttributes::ElementwiseOp::Sub,
+ ElementwiseBinaryCommonAttributes::ElementwiseOp::Mul};
}
-Status ClComponentElementwiseBinary::validate(const ArgumentPack<ITensorInfo> &tensors, const ElementwiseBinaryCommonAttributes &attributes)
+Status ClComponentElementwiseBinary::validate(const ArgumentPack<ITensorInfo> &tensors,
+ const ElementwiseBinaryCommonAttributes &attributes)
{
const auto lhs = tensors.get_const_tensor(TensorType::ACL_SRC_0);
const auto rhs = tensors.get_const_tensor(TensorType::ACL_SRC_1);
const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
// Check operator type
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(supported_ops.find(attributes.operation()) == supported_ops.end(), "Provided Elementwise operation not supported.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(supported_ops.find(attributes.operation()) == supported_ops.end(),
+ "Provided Elementwise operation not supported.");
// Check validity
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs, dst);
//Check data type for different elementwise operators
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16, DataType::S32, DataType::S16, DataType::U8);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F32, DataType::F16, DataType::S32,
+ DataType::S16, DataType::U8);
// dst shape is correct
const TensorShape out_shape = TensorShape::broadcast_shape(lhs->tensor_shape(), rhs->tensor_shape());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(out_shape.total_size() == 0, "Inputs are not broadcast compatible");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0), "Wrong shape for dst.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(out_shape, dst->tensor_shape(), 0),
+ "Wrong shape for dst.");
const auto &lhs_shape = lhs->tensor_shape();
const auto &rhs_shape = rhs->tensor_shape();
const auto &dst_shape = dst->tensor_shape();
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(lhs_shape, dst_shape, 0) && detail::have_different_dimensions(rhs_shape, dst_shape, 0),
- "Only LHS or RHS can be broadcasting, not both.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(lhs_shape, dst_shape, 0) &&
+ detail::have_different_dimensions(rhs_shape, dst_shape, 0),
+ "Only LHS or RHS can be broadcasting, not both.");
// Dimension Y and Z are collapsed together in the current kernel implementation,
// hence they cannot be independently broadcast or non-broadcast.
// See: ClTemplateElementwiseBinary::get_window
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- (lhs_shape[1] != dst_shape[1] || rhs_shape[1] != dst_shape[1]) != (lhs_shape[2] != dst_shape[2] || rhs_shape[2] != dst_shape[2]),
- "Dimension Y and Z must both be either broadcast or non-broadcast.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG((lhs_shape[1] != dst_shape[1] || rhs_shape[1] != dst_shape[1]) !=
+ (lhs_shape[2] != dst_shape[2] || rhs_shape[2] != dst_shape[2]),
+ "Dimension Y and Z must both be either broadcast or non-broadcast.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(lhs_shape, dst_shape, 3),
- "LHS broadcast in dimension 3 or higher is not supported.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(lhs_shape, dst_shape, 3),
+ "LHS broadcast in dimension 3 or higher is not supported.");
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(
- detail::have_different_dimensions(rhs_shape, dst_shape, 3),
- "RHS broadcast in dimension 3 or higher is not supported.");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(detail::have_different_dimensions(rhs_shape, dst_shape, 3),
+ "RHS broadcast in dimension 3 or higher is not supported.");
// Matching data type
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(lhs, rhs);
@@ -112,22 +112,15 @@ Status ClComponentElementwiseBinary::validate(const ArgumentPack<ITensorInfo> &t
ClComponentElementwiseBinary::~ClComponentElementwiseBinary()
{
}
-ClComponentElementwiseBinary::ClComponentElementwiseBinary(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuKernelComponent{ id, properties, tensors },
+ClComponentElementwiseBinary::ClComponentElementwiseBinary(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
+ : IGpuKernelComponent{id, properties, tensors},
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)
-}
+ _component_writer{std::make_unique<ClTemplateElementwiseBinary>(id, tensors, attributes)}
#else //ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)
-}
+ _component_writer{std::make_unique<GpuCkwElementwiseBinary>(id, tensors, attributes)}
#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
index f7175903d0..7589b9732c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h
@@ -82,17 +82,17 @@ public:
* |S16 |S16 |S16 |
* |U8 |U8 |U8 |
*/
- static Status validate(const ArgumentPack<ITensorInfo> &tensors, const ElementwiseBinaryCommonAttributes &attributes);
+ static Status validate(const ArgumentPack<ITensorInfo> &tensors,
+ const ElementwiseBinaryCommonAttributes &attributes);
/** Constructor
*
* Similar to @ref ClComponentElementwiseBinary::validate()
*/
- ClComponentElementwiseBinary(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ ClComponentElementwiseBinary(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes);
/** Destructor */
~ClComponentElementwiseBinary() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
index 279c77e227..27c13bd654 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.cpp
@@ -25,9 +25,10 @@
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h"
@@ -37,10 +38,9 @@ namespace experimental
{
namespace dynamic_fusion
{
-Status ClComponentLogits1DMaxShiftExpSum::validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+Status ClComponentLogits1DMaxShiftExpSum::validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
{
ARM_COMPUTE_UNUSED(properties, attributes);
@@ -75,8 +75,8 @@ ClComponentLogits1DMaxShiftExpSum::ClComponentLogits1DMaxShiftExpSum(ComponentId
const Properties &properties,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateLogits1DMaxShiftExpSum>(id, tensors, attributes) }
+ : IGpuKernelComponent{id, properties, tensors},
+ _component_writer{std::make_unique<ClTemplateLogits1DMaxShiftExpSum>(id, tensors, attributes)}
{
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
index b5db458248..91ab5de3b5 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DMAXSHIFTEXPSUM
#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
namespace arm_compute
@@ -89,10 +90,8 @@ public:
* |F16 | F16 | F16 |
* |F32 | F32 | F32 |
*/
- static Status validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ static Status
+ validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
/** Constructor
*
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
index 7864d56d29..fb2544385c 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.cpp
@@ -25,9 +25,10 @@
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
#include "arm_compute/core/CL/CLHelpers.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h"
@@ -37,10 +38,9 @@ namespace experimental
{
namespace dynamic_fusion
{
-Status ClComponentLogits1DNorm::validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+Status ClComponentLogits1DNorm::validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
{
ARM_COMPUTE_UNUSED(properties, attributes);
@@ -77,8 +77,8 @@ ClComponentLogits1DNorm::ClComponentLogits1DNorm(ComponentId
const Properties &properties,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateLogits1DNorm>(id, tensors, attributes) }
+ : IGpuKernelComponent{id, properties, tensors},
+ _component_writer{std::make_unique<ClTemplateLogits1DNorm>(id, tensors, attributes)}
{
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
index 5bd350b9bd..74c0273604 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTLOGITS1DNORM
#include "arm_compute/dynamic_fusion/sketch/attributes/SoftmaxAttributes.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
namespace arm_compute
@@ -86,10 +87,8 @@ public:
* |F16 | F16 | F16 |
* |F32 | F32 | F32 |
*/
- static Status validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ static Status
+ validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
/** Constructor
*
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
index d415769094..409b191df5 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.cpp
@@ -24,13 +24,15 @@
#include "ClComponentPool2d.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h"
#include "src/dynamic_fusion/utils/Utils.h"
+
#include <memory>
namespace arm_compute
@@ -39,23 +41,24 @@ namespace experimental
{
namespace dynamic_fusion
{
-Status ClComponentPool2d::validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
+Status ClComponentPool2d::validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
{
ARM_COMPUTE_UNUSED(properties);
const auto src = tensors.get_const_tensor(TensorType::ACL_SRC_0);
const auto dst = tensors.get_const_tensor(TensorType::ACL_DST_0);
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
- ARM_COMPUTE_ERROR_ON_MSG((attributes.pool_type() != PoolingType::AVG && attributes.pool_type() != PoolingType::MAX), "Unsupported Pooling type");
+ ARM_COMPUTE_ERROR_ON_MSG((attributes.pool_type() != PoolingType::AVG && attributes.pool_type() != PoolingType::MAX),
+ "Unsupported Pooling type");
// 1. Check validity
// Check if pooling is valid
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(is_pool_region_entirely_outside_input(convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())),
- "Pooling region that is entirely outside input tensor is unsupported");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(
+ is_pool_region_entirely_outside_input(convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())),
+ "Pooling region that is entirely outside input tensor is unsupported");
// Matching data type
ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DATA_TYPES(src, dst);
@@ -70,8 +73,9 @@ Status ClComponentPool2d::validate(
// Device requirements are met
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
- ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(dst->tensor_shape(),
- misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())));
+ ARM_COMPUTE_RETURN_ERROR_ON_MISMATCHING_DIMENSIONS(
+ dst->tensor_shape(), misc::shape_calculator::compute_pool_shape(
+ *src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision())));
// 2. Check support level
// Data type
@@ -83,23 +87,16 @@ Status ClComponentPool2d::validate(
return Status{};
}
-ClComponentPool2d::ClComponentPool2d(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings)
- : IGpuKernelComponent{ id, properties, tensors },
+ClComponentPool2d::ClComponentPool2d(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings)
+ : IGpuKernelComponent{id, properties, tensors},
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<ClTemplatePool2d>(id, tensors, attributes, settings)
-}
+ _component_writer{std::make_unique<ClTemplatePool2d>(id, tensors, attributes, settings)}
#else //ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<GpuCkwPool2d>(id, tensors, attributes, settings)
-}
+ _component_writer{std::make_unique<GpuCkwPool2d>(id, tensors, attributes, settings)}
#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
index 6814bf9243..98fed65004 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h
@@ -25,6 +25,7 @@
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTPOOL2D_H
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
namespace arm_compute
@@ -82,11 +83,10 @@ public:
* |F16 |F16 |
* |F32 |F32 |
*/
- static Status validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
+ static Status validate(const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
/** Constructor
*
@@ -96,12 +96,11 @@ public:
* @param[in] attributes Component attributes
* @param[in] settings Component settings
*/
- ClComponentPool2d(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes,
- const Settings &settings);
+ ClComponentPool2d(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes,
+ const Settings &settings);
/** Destructor */
~ClComponentPool2d() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
index 66e2ee6956..0ece9de970 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.cpp
@@ -22,8 +22,10 @@
* SOFTWARE.
*/
#include "ClComponentReshape.h"
+
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
+
#include "src/core/CL/CLValidate.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h"
@@ -49,12 +51,10 @@ Status ClComponentReshape::validate(const ArgumentPack<ITensorInfo> &tensors)
return Status{};
}
-ClComponentReshape::ClComponentReshape(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors)
- : IGpuKernelComponent{ id, properties, tensors },
- _component_writer{ std::make_unique<ClTemplateReshape>(id, tensors) }
+ClComponentReshape::ClComponentReshape(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors)
+ : IGpuKernelComponent{id, properties, tensors}, _component_writer{std::make_unique<ClTemplateReshape>(id, tensors)}
{
}
ClComponentReshape::~ClComponentReshape()
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
index f8d165b4c8..78163d6603 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h
@@ -73,10 +73,7 @@ public:
* @param[in] properties Component properties @ref Properties
* @param[in] tensors Tensor arguments to the component
*/
- ClComponentReshape(
- ComponentId id,
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors);
+ ClComponentReshape(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors);
/** Destructor */
~ClComponentReshape() override;
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
index 6df1d9b3db..b05eb04698 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.cpp
@@ -66,7 +66,9 @@ Status ClComponentResize::validate(const IGpuKernelComponent::Properties &proper
ARM_COMPUTE_RETURN_ERROR_ON_F16_UNSUPPORTED(src);
// Align corners and sampling policy conformance
- ARM_COMPUTE_RETURN_ERROR_ON(attributes.align_corners() && !arm_compute::scale_utils::is_align_corners_allowed_sampling_policy(attributes.sampling_policy()));
+ ARM_COMPUTE_RETURN_ERROR_ON(
+ attributes.align_corners() &&
+ !arm_compute::scale_utils::is_align_corners_allowed_sampling_policy(attributes.sampling_policy()));
// All tensor infos are initialized
ARM_COMPUTE_RETURN_ERROR_ON(src->tensor_shape().total_size() == 0);
@@ -79,11 +81,11 @@ ClComponentResize::ClComponentResize(ComponentId id,
const IGpuKernelComponent::Properties &properties,
const ArgumentPack<ITensorInfo> &tensors,
const ClComponentResize::Attributes &attributes)
- : IGpuKernelComponent{ id, properties, tensors },
+ : IGpuKernelComponent{id, properties, tensors},
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{ std::make_unique<ClTemplateResize>(id, tensors, attributes) }
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer{ std::make_unique<GpuCkwResize>(id, tensors, attributes) }
+ _component_writer{std::make_unique<ClTemplateResize>(id, tensors, attributes)}
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
+ _component_writer{std::make_unique<GpuCkwResize>(id, tensors, attributes)}
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
}
@@ -94,7 +96,7 @@ ClComponentResize::~ClComponentResize()
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *ClComponentResize::template_writer() const
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ClComponentResize::ckw_component_driver() const
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
{
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
index 474524f8fc..29276c3257 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h
@@ -26,6 +26,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTRESIZE
#include "arm_compute/dynamic_fusion/sketch/attributes/ResizeAttributes.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
namespace arm_compute
@@ -43,7 +44,7 @@ class ArgumentPack;
/** Forward declaration */
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
class ClTemplateResize;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
class GpuCkwResize;
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
@@ -82,10 +83,8 @@ public:
* |U8 |U8 |
* |S16 |S16 |
*/
- static Status validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ static Status
+ validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
/** Constructor
*
@@ -114,7 +113,7 @@ public:
/** Get writer for the component */
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuTemplateComponentWriter *template_writer() const override;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
const IGpuCkwComponentDriver *ckw_component_driver() const override;
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
@@ -127,7 +126,7 @@ public:
private:
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<ClTemplateResize> _component_writer;
-#else // ACL_INTERNAL_TEST_CKW_IN_DF
+#else // ACL_INTERNAL_TEST_CKW_IN_DF
std::unique_ptr<GpuCkwResize> _component_writer;
#endif // ACL_INTERNAL_TEST_CKW_IN_DF
};
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
index 12b81c3d56..dcbecaff35 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.cpp
@@ -38,25 +38,19 @@ namespace experimental
{
namespace dynamic_fusion
{
-Status ClComponentStore::validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors)
+Status ClComponentStore::validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
{
ARM_COMPUTE_UNUSED(properties, tensors);
return Status{};
}
-ClComponentStore::ClComponentStore(ComponentId id, const Properties &properties, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuKernelComponent{ id, properties, tensors },
+ClComponentStore::ClComponentStore(ComponentId id,
+ const Properties &properties,
+ const ArgumentPack<ITensorInfo> &tensors)
+ : IGpuKernelComponent{id, properties, tensors},
#ifndef ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<ClTemplateStore>(id, tensors)
-}
+ _component_writer{std::make_unique<ClTemplateStore>(id, tensors)}
#else //ACL_INTERNAL_TEST_CKW_IN_DF
- _component_writer
-{
- std::make_unique<GpuCkwStore>(id, tensors)
-}
+ _component_writer{std::make_unique<GpuCkwStore>(id, tensors)}
#endif //ACL_INTERNAL_TEST_CKW_IN_DF
{
}
diff --git a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
index 853ee39012..948785c480 100644
--- a/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
+++ b/src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_COMPONENTS_CL_CLCOMPONENTSTORE
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
+
#include <memory>
namespace arm_compute
@@ -70,9 +71,7 @@ public:
* |:--------------|:--------------|
* |All |All |
*/
- static Status validate(
- const Properties &properties,
- const ArgumentPack<ITensorInfo> &tensors);
+ static Status validate(const Properties &properties, const ArgumentPack<ITensorInfo> &tensors);
/** Constructor
*
* Similar to @ref ClComponentStore::validate()
diff --git a/src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h
index bc7133f4df..4c3e84e59d 100644
--- a/src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h
@@ -46,18 +46,16 @@ using namespace experimental::dynamic_fusion;
*/
inline ::std::ostream &operator<<(::std::ostream &os, const ClComponentElementwiseBinary::Attributes::ElementwiseOp &op)
{
- const std::map<ClComponentElementwiseBinary::Attributes::ElementwiseOp, std::string> op_name =
- {
- { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Add, "add" },
- { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Div, "div" },
- { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Max, "max" },
- { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Min, "min" },
- { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Mul, "mul" },
- { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Power, "power" },
- { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Prelu, "prelu" },
- { ClComponentElementwiseBinary::Attributes::ElementwiseOp::SquaredDiff, "squareddiff" },
- { ClComponentElementwiseBinary::Attributes::ElementwiseOp::Sub, "sub" }
- };
+ const std::map<ClComponentElementwiseBinary::Attributes::ElementwiseOp, std::string> op_name = {
+ {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Add, "add"},
+ {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Div, "div"},
+ {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Max, "max"},
+ {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Min, "min"},
+ {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Mul, "mul"},
+ {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Power, "power"},
+ {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Prelu, "prelu"},
+ {ClComponentElementwiseBinary::Attributes::ElementwiseOp::SquaredDiff, "squareddiff"},
+ {ClComponentElementwiseBinary::Attributes::ElementwiseOp::Sub, "sub"}};
os << op_name.at(op);
return os;
}
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
index e7ee1c10df..2cec67dc65 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuAdd.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuAdd.h"
+
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -32,12 +33,11 @@ namespace experimental
{
namespace dynamic_fusion
{
-Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *lhs,
- const ITensorInfo *rhs)
+Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8,
+ DataType::S16, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
// Set the elementwise operation to Add then call the elementwise common validate_op
@@ -46,12 +46,11 @@ Status GpuAdd::validate_op(const GpuWorkloadSketch &sketch,
return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
}
-Status GpuAdd::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *lhs,
- const ITensorInfo *rhs)
+Status GpuAdd::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8,
+ DataType::S16, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
// Set the elementwise operation to Add then call the elementwise common is_supported_op
@@ -60,9 +59,7 @@ Status GpuAdd::is_supported_op(const GpuWorkloadContext &context,
return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
}
-ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *lhs,
- ITensorInfo *rhs)
+ITensorInfo *GpuAdd::create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs)
{
// No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op()
// Set the elementwise operation to Add then call the elementwise common create_op
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp
index 33c2d43e07..6f35e66ea8 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuCast.cpp
@@ -23,12 +23,11 @@
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuCast.h"
+#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentCast.h"
-
-#include "src/common/utils/Log.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
namespace arm_compute
{
@@ -49,7 +48,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate_ptr = dst;
}
@@ -58,25 +57,22 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
// Check support level
// Data Type
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src,
- 1,
- DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL, DataType::S16,
- DataType::U16, DataType::U32, DataType::S32, DataType::F16,
- DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr,
- 1,
- DataType::U8, DataType::S8, DataType::QASYMM8, DataType::S16,
- DataType::U16, DataType::U32, DataType::S32, DataType::F16,
- DataType::F32);
-
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(
+ src, 1, DataType::U8, DataType::S8, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::QSYMM8_PER_CHANNEL,
+ DataType::S16, DataType::U16, DataType::U32, DataType::S32, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(dst_info_to_validate_ptr, 1, DataType::U8, DataType::S8,
+ DataType::QASYMM8, DataType::S16, DataType::U16, DataType::U32,
+ DataType::S32, DataType::F16, DataType::F32);
+
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = context.cl_compile_context();
ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
// Validate Cast Component
{
- const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
- auto settings = ClComponentCast::Settings();
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+ auto settings = ClComponentCast::Settings();
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
@@ -94,16 +90,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
} // namespace
-Status GpuCast::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *src,
- const CastAttributes &attributes)
+Status
+GpuCast::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const CastAttributes &attributes)
{
return is_supported_op_helper(context, src, nullptr, attributes);
}
-Status GpuCast::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *src,
- const CastAttributes &attributes)
+Status GpuCast::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const CastAttributes &attributes)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
@@ -127,9 +120,7 @@ Status GpuCast::validate_op(const GpuWorkloadSketch &sketch,
return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
}
-ITensorInfo *GpuCast::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- const CastAttributes &attributes)
+ITensorInfo *GpuCast::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const CastAttributes &attributes)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_LOG_PARAMS(src, attributes);
@@ -145,14 +136,15 @@ ITensorInfo *GpuCast::create_op(GpuWorkloadSketch &sketch,
GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
const auto *sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
ARM_COMPUTE_ERROR_ON(sketch_ctx->cl_compile_context() == nullptr);
// Add Depthwise Conv2d Component
{
- const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
- auto settings = ClComponentCast::Settings();
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+ auto settings = ClComponentCast::Settings();
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
index 89b533c9b8..697b7d4e1f 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuClamp.cpp
@@ -25,14 +25,13 @@
#include "arm_compute/core/experimental/Types.h"
+#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
-#include "src/common/utils/Log.h"
-
namespace arm_compute
{
namespace experimental
@@ -48,12 +47,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(), "Maximum clamp value cannot be lower than minimum value");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.max_val() < attributes.min_val(),
+ "Maximum clamp value cannot be lower than minimum value");
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate_ptr = dst;
}
@@ -61,16 +61,15 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
auto_init_if_empty(dst_info_to_validate, *src->clone());
// CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
- const ClComponentActivation::Attributes act_info
- {
- ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val()
- };
+ const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ attributes.max_val(), attributes.min_val()};
// Check components
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
// Validate Activation Component
- const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC, src);
@@ -87,16 +86,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
} // namespace
-Status GpuClamp::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *src,
- const ClampAttributes &attributes)
+Status
+GpuClamp::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const ClampAttributes &attributes)
{
return is_supported_op_helper(context, src, nullptr, attributes);
}
-Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *src,
- const ClampAttributes &attributes)
+Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ClampAttributes &attributes)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
@@ -121,9 +117,7 @@ Status GpuClamp::validate_op(const GpuWorkloadSketch &sketch,
return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
}
-ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- const ClampAttributes &attributes)
+ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const ClampAttributes &attributes)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_LOG_PARAMS(src, attributes);
@@ -139,18 +133,16 @@ ITensorInfo *GpuClamp::create_op(GpuWorkloadSketch &sketch,
GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
// CLAMP operator is implemented as LU_BOUNDED_RELU with the alpha and beta variables swapped
- const ClComponentActivation::Attributes act_info
- {
- ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU, attributes.max_val(), attributes.min_val()
- };
+ const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LU_BOUNDED_RELU,
+ attributes.max_val(), attributes.min_val()};
const auto *const sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
// Add Activation Component
auto properties = IGpuKernelComponent::Properties();
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
index cb270ed4b0..aaeec543f8 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuConv2d.cpp
@@ -24,15 +24,15 @@
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuConv2d.h"
#include "arm_compute/core/KernelDescriptors.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/runtime/CL/CLScheduler.h"
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
#include "src/runtime/heuristics/direct_conv/ClDirectConvKernelConfig.h"
#include "src/runtime/heuristics/direct_conv/IClDirectConvKernelConfig.h"
@@ -45,24 +45,30 @@ namespace dynamic_fusion
{
namespace
{
-DirectConvComputeKernelInfo config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
+DirectConvComputeKernelInfo
+config_direct_convolution_nhwc(const ITensorInfo *src, const ITensorInfo *weights, const PadStrideInfo &conv_info)
{
// Get GPU target
GPUTarget gpu_target = CLScheduler::get().target();
- std::unique_ptr<arm_compute::cl_direct_conv::IClDirectConvKernelConfig> t = arm_compute::cl_direct_conv::ClDirectConvKernelConfigurationFactory::create(gpu_target);
+ std::unique_ptr<arm_compute::cl_direct_conv::IClDirectConvKernelConfig> t =
+ arm_compute::cl_direct_conv::ClDirectConvKernelConfigurationFactory::create(gpu_target);
return t->configure(src, weights, conv_info);
}
-void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ITensorInfo *wei, const Conv2dAttributes &attributes)
+void calculate_and_init_dst_if_empty(ITensorInfo *dst,
+ const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const Conv2dAttributes &attributes)
{
- if(dst->total_size() == 0U)
+ if (dst->total_size() == 0U)
{
- const auto shape = misc::shape_calculator::compute_deep_convolution_shape(src->tensor_shape(), src->data_layout(), wei->tensor_shape(),
- PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
- attributes.pad().right,
- attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
+ const auto shape = misc::shape_calculator::compute_deep_convolution_shape(
+ src->tensor_shape(), src->data_layout(), wei->tensor_shape(),
+ PadStrideInfo(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+ attributes.pad().right, attributes.pad().top, attributes.pad().bottom,
+ DimensionRoundingType::FLOOR)); // use the default DimensionRoundingType
auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
}
@@ -83,7 +89,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate_ptr = dst;
}
@@ -98,18 +104,20 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
// Check components
const auto gpu_target = context.gpu_target();
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = context.cl_compile_context();
ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
// Validate Direct Conv2d Component
{
- const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
- auto settings = ClComponentDirectConv2d::Settings();
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+ auto settings = ClComponentDirectConv2d::Settings();
settings.fast_relaxed_math(
- (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
- && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16));
+ (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) &&
+ (dst_info_to_validate_ptr->data_type() == DataType::F32 ||
+ dst_info_to_validate_ptr->data_type() == DataType::F16));
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
@@ -142,14 +150,14 @@ Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch,
const ITensorInfo *src,
const ITensorInfo *wei,
const ITensorInfo *bia,
- const Conv2dAttributes &attributes)
+ const Conv2dAttributes &attributes)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, wei);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(!wei->are_values_constant(), "Dynamic weights are not supported");
// Check if tensors have valid id. I.e. they are created from a sketch
ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id());
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id());
}
@@ -178,16 +186,13 @@ Status GpuConv2d::validate_op(const GpuWorkloadSketch &sketch,
return is_supported_op_helper(*sketch.gpu_context(), src, wei, bia, &dst_info_to_validate, attributes);
}
-ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- ITensorInfo *wei,
- ITensorInfo *bia,
- const Conv2dAttributes &attributes)
+ITensorInfo *GpuConv2d::create_op(
+ GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *wei, ITensorInfo *bia, const Conv2dAttributes &attributes)
{
ARM_COMPUTE_LOG_PARAMS(src, wei, bia, attributes);
PadStrideInfo conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
- attributes.pad().right,
- attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+ attributes.pad().right, attributes.pad().top, attributes.pad().bottom,
+ DimensionRoundingType::FLOOR);
// Initialize the direct convolution descriptor
const DirectConvComputeKernelInfo desc = config_direct_convolution_nhwc(src, wei, conv_info);
@@ -207,7 +212,7 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch,
const auto gpu_target = sketch_ctx->gpu_target();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
ARM_COMPUTE_ERROR_ON(cl_compile_ctx == nullptr);
@@ -216,17 +221,17 @@ ITensorInfo *GpuConv2d::create_op(GpuWorkloadSketch &sketch,
// Add Direct Conv2d Component
{
auto properties = IGpuKernelComponent::Properties();
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
auto settings = ClComponentDirectConv2d::Settings();
settings.fast_relaxed_math(
- (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
- && (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16));
+ (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) &&
+ (dst->data_type() == DataType::F32 || dst->data_type() == DataType::F16));
settings.direct_conv_descriptor(desc);
- if(settings.export_to_cl_image())
+ if (settings.export_to_cl_image())
{
arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
}
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
index c72098e943..e2b673bd43 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuDepthwiseConv2d.cpp
@@ -28,8 +28,8 @@
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/gpu/cl/kernels/gemm/ClGemmHelpers.h"
#include "src/runtime/heuristics/dwc_native/ClDWCNativeKernelConfig.h"
#include "src/runtime/heuristics/dwc_native/IClDWCNativeKernelConfig.h"
@@ -42,20 +42,20 @@ namespace dynamic_fusion
{
namespace
{
-void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ITensorInfo *wei, const DepthwiseConv2dAttributes &attributes)
+void calculate_and_init_dst_if_empty(ITensorInfo *dst,
+ const ITensorInfo *src,
+ const ITensorInfo *wei,
+ const DepthwiseConv2dAttributes &attributes)
{
- if(dst->total_size() == 0U)
+ if (dst->total_size() == 0U)
{
- const PadStrideInfo pad_stride_info(attributes.stride().x(),
- attributes.stride().y(),
- attributes.pad().left,
- attributes.pad().right,
- attributes.pad().top,
- attributes.pad().bottom,
+ const PadStrideInfo pad_stride_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
+ attributes.pad().right, attributes.pad().top, attributes.pad().bottom,
attributes.dimension_rounding_type());
- const ConvolutionInfo conv_info{ pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(), attributes.dilation() };
- const TensorShape shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info);
+ const ConvolutionInfo conv_info{pad_stride_info, attributes.depth_multiplier(), ActivationLayerInfo(),
+ attributes.dilation()};
+ const TensorShape shape = misc::shape_calculator::compute_depthwise_convolution_shape(*src, *wei, conv_info);
auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
}
@@ -76,7 +76,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate_ptr = dst;
}
@@ -91,40 +91,44 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
const GpuTarget gpu_target = context.gpu_target();
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
const CLCompileContext *cl_compile_ctx = context.cl_compile_context();
ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
// Validate Depthwise Conv2d Component
{
- const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
- auto settings = ClComponentDepthwiseConv2d::Settings();
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+ auto settings = ClComponentDepthwiseConv2d::Settings();
- const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
- attributes.pad().right,
- attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+ const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(),
+ attributes.pad().left, attributes.pad().right, attributes.pad().top,
+ attributes.pad().bottom, DimensionRoundingType::FLOOR);
// Get the depthwise convolution compute parameters
- auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
- const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
+ auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+ const DWCComputeKernelInfo dwc_info =
+ t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
settings.fast_relaxed_math(
- (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST)
- && (dst_info_to_validate_ptr->data_type() == DataType::F32 || dst_info_to_validate_ptr->data_type() == DataType::F16));
+ (gpu_target != GPUTarget::G71 && (gpu_target & GPUTarget::GPU_ARCH_MASK) == GPUTarget::BIFROST) &&
+ (dst_info_to_validate_ptr->data_type() == DataType::F32 ||
+ dst_info_to_validate_ptr->data_type() == DataType::F16));
settings.is_fma_available(get_arch_from_target(gpu_target) == GPUTarget::MIDGARD)
- .m0(dwc_info.m0)
- .n0(dwc_info.n0)
- .export_input_to_cl_image(dwc_info.export_input_to_cl_image)
- .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image);
+ .m0(dwc_info.m0)
+ .n0(dwc_info.n0)
+ .export_input_to_cl_image(dwc_info.export_input_to_cl_image)
+ .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image);
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
arguments.add_const_tensor(ACL_SRC_1, wei);
arguments.add_const_tensor(ACL_SRC_2, bia);
arguments.add_const_tensor(ACL_DST_0, dst_info_to_validate_ptr);
- ARM_COMPUTE_RETURN_ON_ERROR(ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ClComponentDepthwiseConv2d::validate(properties, arguments, attributes, settings));
}
}
else
@@ -158,7 +162,7 @@ Status GpuDepthwiseConv2d::validate_op(const GpuWorkloadSketch &sketch,
ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !wei->has_valid_id());
- if(bia != nullptr)
+ if (bia != nullptr)
{
ARM_COMPUTE_RETURN_ERROR_ON(!bia->has_valid_id());
}
@@ -205,35 +209,37 @@ ITensorInfo *GpuDepthwiseConv2d::create_op(GpuWorkloadSketch &sket
const auto *sketch_ctx = sketch.implementation().context();
const GpuTarget gpu_target = sketch_ctx->gpu_target();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(sketch_ctx->cl_compile_context());
// Add Depthwise Conv2d Component
{
- const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
- auto settings = ClComponentDepthwiseConv2d::Settings();
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
+ auto settings = ClComponentDepthwiseConv2d::Settings();
- const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(), attributes.pad().left,
- attributes.pad().right,
- attributes.pad().top, attributes.pad().bottom, DimensionRoundingType::FLOOR);
+ const PadStrideInfo legacy_conv_info(attributes.stride().x(), attributes.stride().y(),
+ attributes.pad().left, attributes.pad().right, attributes.pad().top,
+ attributes.pad().bottom, DimensionRoundingType::FLOOR);
// Get the depthwise convolution compute parameters
- auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
- const DWCComputeKernelInfo dwc_info = t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
+ auto t = arm_compute::cl_dwc::ClDWCNativeKernelConfigurationFactory::create(gpu_target);
+ const DWCComputeKernelInfo dwc_info =
+ t->configure(src, wei, legacy_conv_info, attributes.dilation(), attributes.depth_multiplier());
settings.is_fma_available(get_arch_from_target(gpu_target) != GPUTarget::MIDGARD)
- .m0(dwc_info.m0)
- .n0(dwc_info.n0)
- .export_input_to_cl_image(dwc_info.export_input_to_cl_image)
- .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image);
+ .m0(dwc_info.m0)
+ .n0(dwc_info.n0)
+ .export_input_to_cl_image(dwc_info.export_input_to_cl_image)
+ .export_weights_to_cl_image(dwc_info.export_weights_to_cl_image);
- if(settings.export_input_to_cl_image())
+ if (settings.export_input_to_cl_image())
{
arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(src);
}
- if(settings.export_weights_to_cl_image())
+ if (settings.export_weights_to_cl_image())
{
arm_compute::opencl::kernels::gemm::update_padding_for_cl_image(wei);
}
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
index 464a32cbad..b871171e8d 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuMul.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuMul.h"
+
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -32,9 +33,7 @@ namespace experimental
{
namespace dynamic_fusion
{
-Status GpuMul::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *lhs,
- const ITensorInfo *rhs)
+Status GpuMul::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
@@ -46,9 +45,7 @@ Status GpuMul::validate_op(const GpuWorkloadSketch &sketch,
return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
}
-Status GpuMul::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *lhs,
- const ITensorInfo *rhs)
+Status GpuMul::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32);
@@ -60,9 +57,7 @@ Status GpuMul::is_supported_op(const GpuWorkloadContext &context,
return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
}
-ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *lhs,
- ITensorInfo *rhs)
+ITensorInfo *GpuMul::create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs)
{
// Set the elementwise operation to Mul then call the elementwise common create_op
ElementwiseBinaryCommonAttributes common_attributes{};
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
index 107a5e5fa7..f0d368d757 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuOutput.cpp
@@ -26,10 +26,9 @@
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
-
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentStore.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/utils/Utils.h"
namespace arm_compute
@@ -43,9 +42,7 @@ namespace
constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
} // namespace
-Status GpuOutput::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *src,
- const ITensorInfo *dst)
+Status GpuOutput::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
@@ -60,9 +57,7 @@ Status GpuOutput::is_supported_op(const GpuWorkloadContext &context,
return Status{};
}
-Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *src,
- const ITensorInfo *dst)
+Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
@@ -90,9 +85,7 @@ Status GpuOutput::validate_op(const GpuWorkloadSketch &sketch,
return status;
}
-void GpuOutput::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- ITensorInfo *dst)
+void GpuOutput::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *dst)
{
ARM_COMPUTE_LOG_PARAMS(src, dst);
ARM_COMPUTE_ERROR_THROW_ON(GpuOutput::validate_op(sketch, src, dst));
@@ -104,14 +97,14 @@ void GpuOutput::create_op(GpuWorkloadSketch &sketch,
auto &comp_graph = sketch.implementation().component_graph();
const auto sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
ARM_COMPUTE_ERROR_ON(sketch_ctx->cl_compile_context() == nullptr);
// Add store component
{
IGpuKernelComponent::Properties properties;
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
index 7ecfa0158b..55c604aacc 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuPool2d.cpp
@@ -22,20 +22,21 @@
* SOFTWARE.
*/
+#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
#include "arm_compute/core/CL/CLCompileContext.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+#include "arm_compute/core/Validate.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadContext.h"
-
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
-#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSourceCode.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
#include "src/dynamic_fusion/utils/Utils.h"
namespace arm_compute
@@ -46,11 +47,15 @@ namespace dynamic_fusion
{
namespace
{
-void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const Pool2dAttributes &attributes, const GpuPool2dSettings &settings)
+void calculate_and_init_dst_if_empty(ITensorInfo *dst,
+ const ITensorInfo *src,
+ const Pool2dAttributes &attributes,
+ const GpuPool2dSettings &settings)
{
- if(dst->total_size() == 0U)
+ if (dst->total_size() == 0U)
{
- auto shape = misc::shape_calculator::compute_pool_shape(*src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision()));
+ auto shape = misc::shape_calculator::compute_pool_shape(
+ *src, convert_pool_attr_to_pool_info(attributes, settings.mixed_precision()));
auto_init_if_empty(*dst, src->clone()->set_tensor_shape(shape));
}
}
@@ -82,7 +87,7 @@ bool GpuPool2dSettings::use_inf_as_limit() const
Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch,
const ITensorInfo *src,
- const Pool2dAttributes &attributes,
+ const Pool2dAttributes &attributes,
const GpuPool2dSettings &settings)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
@@ -110,7 +115,7 @@ Status GpuPool2d::validate_op(const GpuWorkloadSketch &sketch,
Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context,
const ITensorInfo *src,
const Pool2dAttributes &attributes,
- const GpuPool2dSettings &settings)
+ const GpuPool2dSettings &settings)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
// Data type
@@ -118,7 +123,8 @@ Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context,
// Data layout
ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
// Check exclude padding is not false
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(!attributes.exclude_padding(), "Exclude padding must be set to true in Attributes!");
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(!attributes.exclude_padding(),
+ "Exclude padding must be set to true in Attributes!");
// Auto initialize dst tensor info
TensorInfo dst_info_to_validate;
@@ -126,14 +132,15 @@ Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context,
calculate_and_init_dst_if_empty(&dst_info_to_validate, src, attributes, settings);
// Check components
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = context.cl_compile_context();
ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
// Validate Component
{
- const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ const KernelProperties properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
@@ -148,10 +155,10 @@ Status GpuPool2d::is_supported_op(const GpuWorkloadContext &context,
return Status{};
}
-ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- const Pool2dAttributes &attributes,
- const GpuPool2dSettings &settings)
+ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch &sketch,
+ ITensorInfo *src,
+ const Pool2dAttributes &attributes,
+ const GpuPool2dSettings &settings)
{
// Assert validation
ARM_COMPUTE_ERROR_THROW_ON(GpuPool2d::validate_op(sketch, src, attributes, settings));
@@ -168,7 +175,7 @@ ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch &sketch,
const auto sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
ARM_COMPUTE_UNUSED(cl_compile_ctx);
@@ -177,7 +184,7 @@ ITensorInfo *GpuPool2d::create_op(GpuWorkloadSketch &sketch,
// Add Component
{
auto properties = IGpuKernelComponent::Properties();
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp
index 0f43a578df..3def7a1a81 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuReshape.cpp
@@ -22,12 +22,14 @@
* SOFTWARE.
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuReshape.h"
+
#include "arm_compute/core/Error.h"
+
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
namespace arm_compute
{
@@ -40,14 +42,14 @@ namespace
Status is_supported_op_helper(const GpuWorkloadContext &context,
const ITensorInfo *src,
const ITensorInfo *dst,
- const ReshapeAttributes &attributes)
+ const ReshapeAttributes &attributes)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate_ptr = dst;
}
@@ -55,7 +57,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
auto_init_if_empty(dst_info_to_validate, src->clone()->set_tensor_shape(attributes.shape()));
// Check components
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = context.cl_compile_context();
ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
@@ -78,16 +80,13 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
GpuOperatorType operator_type = GpuOperatorType::Complex;
} // namespace
-Status GpuReshape::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *src,
- const Attributes &attributes)
+Status
+GpuReshape::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const Attributes &attributes)
{
return is_supported_op_helper(context, src, nullptr, attributes);
}
-Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *src,
- const Attributes &attributes)
+Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const Attributes &attributes)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
@@ -111,9 +110,7 @@ Status GpuReshape::validate_op(const GpuWorkloadSketch &sketch,
return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
}
-ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- const Attributes &attributes)
+ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const Attributes &attributes)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_LOG_PARAMS(src, attributes.shape());
@@ -127,7 +124,7 @@ ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch,
// Translate into components and add to component graph
auto &comp_graph = sketch.implementation().component_graph();
const auto sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
ARM_COMPUTE_UNUSED(cl_compile_ctx);
@@ -136,7 +133,7 @@ ITensorInfo *GpuReshape::create_op(GpuWorkloadSketch &sketch,
// Add ElementwiseBinary Component
{
auto properties = IGpuKernelComponent::Properties();
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp
index 5f52eea7d0..fb09875b33 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuResize.cpp
@@ -26,12 +26,12 @@
#include "arm_compute/core/Error.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
+
+#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentResize.h"
-
-#include "src/common/utils/Log.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
namespace arm_compute
{
@@ -43,7 +43,7 @@ namespace
{
void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *src, const ResizeAttributes &attributes)
{
- if(dst->total_size() == 0U)
+ if (dst->total_size() == 0U)
{
TensorShape out_shape = src->tensor_shape();
@@ -64,7 +64,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate_ptr = dst;
}
@@ -73,22 +73,25 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
// Check support level
// Data type
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED, DataType::U8, DataType::S16, DataType::F16, DataType::F32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::QASYMM8, DataType::QASYMM8_SIGNED,
+ DataType::U8, DataType::S16, DataType::F16, DataType::F32);
// Data layout
ARM_COMPUTE_RETURN_ERROR_ON_DATA_LAYOUT_NOT_IN(src, DataLayout::NHWC);
// Interpolation policy
- ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.interpolation_policy() != InterpolationPolicy::NEAREST_NEIGHBOR && attributes.interpolation_policy() != InterpolationPolicy::BILINEAR,
+ ARM_COMPUTE_RETURN_ERROR_ON_MSG(attributes.interpolation_policy() != InterpolationPolicy::NEAREST_NEIGHBOR &&
+ attributes.interpolation_policy() != InterpolationPolicy::BILINEAR,
"Interpolation policy must be NEAREST_NEIGHBOR or BILINEAR");
// Check components
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = context.cl_compile_context();
ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
// Validate Activation Component
{
- const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ const KernelProperties properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
@@ -107,16 +110,14 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
constexpr GpuOperatorType operator_type = GpuOperatorType::Complex;
} // namespace
-Status GpuResize::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *src,
- const Attributes &attributes)
+Status
+GpuResize::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src, const Attributes &attributes)
{
return is_supported_op_helper(context, src, nullptr, attributes);
}
-Status GpuResize::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *src,
- const GpuResize::Attributes &attributes)
+Status
+GpuResize::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src, const GpuResize::Attributes &attributes)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id());
@@ -141,9 +142,7 @@ Status GpuResize::validate_op(const GpuWorkloadSketch &sketch,
return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
}
-ITensorInfo *GpuResize::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- const GpuResize::Attributes &attributes)
+ITensorInfo *GpuResize::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, const GpuResize::Attributes &attributes)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_LOG_PARAMS(src, attributes);
@@ -159,13 +158,14 @@ ITensorInfo *GpuResize::create_op(GpuWorkloadSketch &sketch,
GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
const auto *sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(sketch_ctx->cl_compile_context());
// Add Resize Component
{
- const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp
index 09debad969..a2260c8c36 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.cpp
@@ -23,14 +23,15 @@
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSigmoid.h"
+
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
namespace arm_compute
{
@@ -40,9 +41,7 @@ namespace dynamic_fusion
{
namespace
{
-Status is_supported_op_helper(const GpuWorkloadContext &context,
- const ITensorInfo *src,
- const ITensorInfo *dst)
+Status is_supported_op_helper(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
@@ -50,20 +49,21 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate_ptr = dst;
}
auto_init_if_empty(dst_info_to_validate, *src->clone());
- const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LOGISTIC };
+ const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC};
// Check components
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
// Validate Activation Component
- const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC, src);
@@ -80,14 +80,12 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
} // namespace
-Status GpuSigmoid::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *src)
+Status GpuSigmoid::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src)
{
return is_supported_op_helper(context, src, nullptr);
}
-Status GpuSigmoid::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *src)
+Status GpuSigmoid::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
@@ -112,8 +110,7 @@ Status GpuSigmoid::validate_op(const GpuWorkloadSketch &sketch,
return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate);
}
-ITensorInfo *GpuSigmoid::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src)
+ITensorInfo *GpuSigmoid::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_LOG_PARAMS(src);
@@ -128,15 +125,15 @@ ITensorInfo *GpuSigmoid::create_op(GpuWorkloadSketch &sketch,
// Translate into components and add to component graph
GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
- const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::LOGISTIC };
+ const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::LOGISTIC};
const auto *const sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
// Add Activation Component
auto properties = IGpuKernelComponent::Properties();
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
index ffc4553a7d..c87b282aec 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.cpp
@@ -22,13 +22,14 @@
* SOFTWARE.
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSoftmax.h"
+
#include "arm_compute/core/Error.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DMaxShiftExpSum.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentLogits1DNorm.h"
#include "src/dynamic_fusion/sketch/gpu/GpuOperatorProperties.h"
#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
@@ -52,7 +53,7 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
// Auto initialize dst tensor info
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate = *dst;
}
@@ -61,11 +62,12 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
auto_init_if_empty(dst_info_to_validate, *src->clone());
}
// Check components
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = context.cl_compile_context();
ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
- const KernelProperties properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ const KernelProperties properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
TensorShape logits_sum_shape = src->tensor_shape();
TensorInfo logits(src->clone()->set_tensor_shape(logits_sum_shape));
@@ -86,7 +88,8 @@ Status GpuSoftmax::is_supported_op(const GpuWorkloadContext &context,
arguments_norm.add_const_tensor(ACL_SRC_1, &sum);
arguments_norm.add_const_tensor(ACL_DST_0, &dst_info_to_validate);
- ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes));
+ ARM_COMPUTE_RETURN_ON_ERROR(
+ ClComponentLogits1DMaxShiftExpSum::validate(properties, arguments_exp_sum, attributes));
ARM_COMPUTE_RETURN_ON_ERROR(ClComponentLogits1DNorm::validate(properties, arguments_norm, attributes));
}
else
@@ -105,14 +108,16 @@ Status GpuSoftmax::validate_op(const GpuWorkloadSketch &sketch,
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON(!src->has_valid_id() || !dst->has_valid_id());
ARM_COMPUTE_RETURN_ERROR_ON_MSG(src->num_dimensions() > 4, "Only up to 4 dimensions are supported");
- ARM_COMPUTE_RETURN_ERROR_ON(attributes.axis() < static_cast<int32_t>(-src->num_dimensions()) || static_cast<int32_t>(src->num_dimensions()) <= attributes.axis());
+ ARM_COMPUTE_RETURN_ERROR_ON(attributes.axis() < static_cast<int32_t>(-src->num_dimensions()) ||
+ static_cast<int32_t>(src->num_dimensions()) <= attributes.axis());
// Auto initialize dst tensor info
TensorInfo dst_info_to_validate = *dst;
auto_init_if_empty(dst_info_to_validate, *src->clone());
- const size_t actual_axis = static_cast<size_t>(wrap_around(attributes.axis(), static_cast<int32_t>(src->num_dimensions())));
- const bool needs_permute = actual_axis != 0;
+ const size_t actual_axis =
+ static_cast<size_t>(wrap_around(attributes.axis(), static_cast<int32_t>(src->num_dimensions())));
+ const bool needs_permute = actual_axis != 0;
ARM_COMPUTE_RETURN_ERROR_ON_MSG(needs_permute, "Dynamic fusion softmax on axis!=0 not supported yet.");
// Perform fusion test and check if the operator meets the fusion constraints
@@ -128,17 +133,16 @@ Status GpuSoftmax::validate_op(const GpuWorkloadSketch &sketch,
return is_supported_op(*sketch.gpu_context(), src, &dst_info_to_validate, attributes);
}
-void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src,
- ITensorInfo *dst,
- const Attributes &attributes)
+void GpuSoftmax::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src, ITensorInfo *dst, const Attributes &attributes)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_LOG_PARAMS(src, dst, attributes);
TensorShape logits_sum_shape = src->tensor_shape();
- ITensorInfo *logits = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
+ ITensorInfo *logits = sketch.implementation().create_auxiliary_tensor(
+ src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
logits_sum_shape.set(0, 1);
- ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
+ ITensorInfo *sum = sketch.implementation().create_auxiliary_tensor(
+ src->clone()->set_id(ITensorInfo::invalid_tensor_id).set_tensor_shape(logits_sum_shape));
// Auto initialize dst tensor info and the auxiliary tensor infos as well
auto_init_if_empty(*dst, *src->clone());
@@ -151,7 +155,7 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
auto &comp_graph = sketch.implementation().component_graph();
const auto sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = sketch_ctx->cl_compile_context();
ARM_COMPUTE_UNUSED(cl_compile_ctx);
@@ -160,7 +164,7 @@ void GpuSoftmax::create_op(GpuWorkloadSketch &sketch,
// Add Direct Conv2d Component
{
auto properties = IGpuKernelComponent::Properties();
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments_exp_sum;
ArgumentPack<ITensorInfo> arguments_norm;
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp
index 8240008f2a..e5d62c9930 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuSub.cpp
@@ -22,6 +22,7 @@
* SOFTWARE.
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuSub.h"
+
#include "arm_compute/dynamic_fusion/sketch/gpu/GpuWorkloadSketch.h"
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
@@ -32,12 +33,11 @@ namespace experimental
{
namespace dynamic_fusion
{
-Status GpuSub::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *lhs,
- const ITensorInfo *rhs)
+Status GpuSub::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *lhs, const ITensorInfo *rhs)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8,
+ DataType::S16, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
// Set the elementwise operation to Sub then call the elementwise common validate_op
@@ -46,12 +46,11 @@ Status GpuSub::validate_op(const GpuWorkloadSketch &sketch,
return GpuElementwiseBinaryCommon::validate_op(sketch, lhs, rhs, common_attributes);
}
-Status GpuSub::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *lhs,
- const ITensorInfo *rhs)
+Status GpuSub::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *lhs, const ITensorInfo *rhs)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(lhs, rhs);
- ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8, DataType::S16, DataType::S32);
+ ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(lhs, 1, DataType::F16, DataType::F32, DataType::U8,
+ DataType::S16, DataType::S32);
ARM_COMPUTE_RETURN_ERROR_ON_MSG(lhs->data_type() != rhs->data_type(), "Input tensors must be the same data type");
// Set the elementwise operation to Sub then call the elementwise common is_supported_op
@@ -60,9 +59,7 @@ Status GpuSub::is_supported_op(const GpuWorkloadContext &context,
return GpuElementwiseBinaryCommon::is_supported_op(context, lhs, rhs, common_attributes);
}
-ITensorInfo *GpuSub::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *lhs,
- ITensorInfo *rhs)
+ITensorInfo *GpuSub::create_op(GpuWorkloadSketch &sketch, ITensorInfo *lhs, ITensorInfo *rhs)
{
// No need to log or validate as they'll be handled inside GpuElementwiseBinaryCommon::create_op()
// Set the elementwise operation to Sub then call the elementwise common create_op
diff --git a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
index c00716c76e..bf0f274c5c 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/GpuTanh.cpp
@@ -23,14 +23,15 @@
*/
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuTanh.h"
+
#include "arm_compute/core/experimental/Types.h"
+#include "src/common/utils/Log.h"
+#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h"
-#include "src/core/helpers/AutoConfiguration.h"
-#include "src/common/utils/Log.h"
namespace arm_compute
{
@@ -40,9 +41,7 @@ namespace dynamic_fusion
{
namespace
{
-Status is_supported_op_helper(const GpuWorkloadContext &context,
- const ITensorInfo *src,
- const ITensorInfo *dst)
+Status is_supported_op_helper(const GpuWorkloadContext &context, const ITensorInfo *src, const ITensorInfo *dst)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src, dst);
ARM_COMPUTE_RETURN_ERROR_ON_DATA_TYPE_CHANNEL_NOT_IN(src, 1, DataType::F16, DataType::F32);
@@ -50,20 +49,21 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate_ptr = dst;
}
auto_init_if_empty(dst_info_to_validate, *src->clone());
- const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::TANH };
+ const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::TANH};
// Check components
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
// Validate Activation Component
- const auto properties = IGpuKernelComponent::Properties().stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ const auto properties =
+ IGpuKernelComponent::Properties().stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC, src);
@@ -80,14 +80,12 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
constexpr GpuOperatorType operator_type = GpuOperatorType::Simple;
} // namespace
-Status GpuTanh::is_supported_op(const GpuWorkloadContext &context,
- const ITensorInfo *src)
+Status GpuTanh::is_supported_op(const GpuWorkloadContext &context, const ITensorInfo *src)
{
return is_supported_op_helper(context, src, nullptr);
}
-Status GpuTanh::validate_op(const GpuWorkloadSketch &sketch,
- const ITensorInfo *src)
+Status GpuTanh::validate_op(const GpuWorkloadSketch &sketch, const ITensorInfo *src)
{
ARM_COMPUTE_RETURN_ERROR_ON_NULLPTR(src);
@@ -112,8 +110,7 @@ Status GpuTanh::validate_op(const GpuWorkloadSketch &sketch,
return is_supported_op_helper(*sketch.gpu_context(), src, &dst_info_to_validate);
}
-ITensorInfo *GpuTanh::create_op(GpuWorkloadSketch &sketch,
- ITensorInfo *src)
+ITensorInfo *GpuTanh::create_op(GpuWorkloadSketch &sketch, ITensorInfo *src)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(src);
ARM_COMPUTE_LOG_PARAMS(src);
@@ -128,15 +125,15 @@ ITensorInfo *GpuTanh::create_op(GpuWorkloadSketch &sketch,
// Translate into components and add to component graph
GpuKernelComponentGraph &comp_graph = sketch.implementation().component_graph();
- const ClComponentActivation::Attributes act_info{ ActivationLayerInfo::ActivationFunction::TANH };
+ const ClComponentActivation::Attributes act_info{ActivationLayerInfo::ActivationFunction::TANH};
const auto *const sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
// Add Activation Component
auto properties = IGpuKernelComponent::Properties();
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC, src);
diff --git a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
index 7c087c9a7b..d79a4c42c9 100644
--- a/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
+++ b/src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.cpp
@@ -22,11 +22,12 @@
* SOFTWARE.
*/
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
+
#include "src/common/utils/Log.h"
#include "src/core/helpers/AutoConfiguration.h"
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuWorkloadSketchImpl.h"
namespace arm_compute
{
@@ -38,9 +39,10 @@ namespace
{
void calculate_and_init_dst_if_empty(ITensorInfo *dst, const ITensorInfo *lhs, const ITensorInfo *rhs)
{
- if(dst->total_size() == 0U)
+ if (dst->total_size() == 0U)
{
- const std::pair<TensorShape, ValidRegion> broadcast_pair = ITensorInfo::broadcast_shape_and_valid_region(*lhs, *rhs);
+ const std::pair<TensorShape, ValidRegion> broadcast_pair =
+ ITensorInfo::broadcast_shape_and_valid_region(*lhs, *rhs);
auto_init_if_empty(*dst, lhs->clone()->set_tensor_shape(broadcast_pair.first));
}
}
@@ -56,7 +58,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
TensorInfo dst_info_to_validate;
const ITensorInfo *dst_info_to_validate_ptr = &dst_info_to_validate;
- if(dst != nullptr)
+ if (dst != nullptr)
{
dst_info_to_validate_ptr = dst;
}
@@ -64,7 +66,7 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
calculate_and_init_dst_if_empty(&dst_info_to_validate, lhs, rhs);
// Check components
- if(context.gpu_language() == GpuLanguage::OpenCL)
+ if (context.gpu_language() == GpuLanguage::OpenCL)
{
const auto cl_compile_ctx = context.cl_compile_context();
ARM_COMPUTE_RETURN_ERROR_ON(cl_compile_ctx == nullptr);
@@ -90,7 +92,8 @@ Status is_supported_op_helper(const GpuWorkloadContext &context,
GpuOperatorType operator_type = GpuOperatorType::Simple;
} // namespace
-ElementwiseBinaryCommonAttributes &ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation)
+ElementwiseBinaryCommonAttributes &
+ElementwiseBinaryCommonAttributes::operation(const ElementwiseBinaryCommonAttributes::ElementwiseOp &operation)
{
_operation = operation;
return *this;
@@ -157,14 +160,14 @@ ITensorInfo *GpuElementwiseBinaryCommon::create_op(GpuWorkloadSketch
const auto sketch_ctx = sketch.implementation().context();
- if(sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
+ if (sketch_ctx->gpu_language() == GpuLanguage::OpenCL)
{
ARM_COMPUTE_ERROR_ON_NULLPTR(sketch_ctx->cl_compile_context());
// Add ElementwiseBinary Component
{
auto properties = IGpuKernelComponent::Properties();
- properties.stage(UnitWorkloadStage{ UnitWorkloadStage::Stage::Run });
+ properties.stage(UnitWorkloadStage{UnitWorkloadStage::Stage::Run});
ArgumentPack<ITensorInfo> arguments;
arguments.add_const_tensor(ACL_SRC_0, lhs);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
index 0972b4e8e2..775b0a0c8c 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.cpp
@@ -22,8 +22,10 @@
* SOFTWARE.
*/
#include "GpuKernelVariableTable.h"
+
#include "arm_compute/core/CL/CLHelpers.h"
#include "arm_compute/core/ITensorInfo.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
namespace arm_compute
@@ -32,14 +34,17 @@ namespace experimental
{
namespace dynamic_fusion
{
-void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, const std::string &alias)
+void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group,
+ const ITensorInfo *tensor,
+ GpuKernelArgumentInfo argument_info,
+ const std::string &alias)
{
ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
// Do not re-declare if the variable associated with the tensor has already been declared
auto it = _vars.find(tensor->id());
- if(it != _vars.end())
+ if (it != _vars.end())
{
ARM_COMPUTE_ERROR_ON(!(it->second.kernel_argument_info == argument_info));
return;
@@ -47,14 +52,12 @@ void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &com
const auto target = comp_group.get_tile_for_tensor(tensor);
- if(target != tensor)
+ if (target != tensor)
{
// If the tensor uses a shared tile, don't declare another variable.
it = _vars.find(target->id());
- ARM_COMPUTE_ERROR_ON_MSG(
- it == _vars.end(),
- "The variable used for this tensor must have been declared.");
+ ARM_COMPUTE_ERROR_ON_MSG(it == _vars.end(), "The variable used for this tensor must have been declared.");
_vars[tensor->id()] = it->second;
}
@@ -64,7 +67,7 @@ void GpuKernelVariableTable::declare_variable(const GpuKernelComponentGroup &com
std::stringstream ss;
ss << alias << "_t" << abs(tensor->id());
const auto uniq_name = ss.str();
- TensorVariable var{ tensor->id(), uniq_name, argument_info };
+ TensorVariable var{tensor->id(), uniq_name, argument_info};
_vars.emplace(tensor->id(), var);
}
@@ -76,12 +79,13 @@ GpuKernelVariableTable::TensorVariable GpuKernelVariableTable::get_variable(cons
return var;
}
-GpuKernelVariableTable::VariableList GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const
+GpuKernelVariableTable::VariableList
+GpuKernelVariableTable::get_variable_list(const std::vector<const ITensorInfo *> &tensors) const
{
VariableList vars{};
- for(const auto &tensor : tensors)
+ for (const auto &tensor : tensors)
{
- if(!tensor->has_valid_id())
+ if (!tensor->has_valid_id())
{
continue;
}
@@ -90,23 +94,19 @@ GpuKernelVariableTable::VariableList GpuKernelVariableTable::get_variable_list(c
return vars;
}
-TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var)
- : value{ var.uniq_name }
+TagVal::TagVal(const GpuKernelVariableTable::TensorVariable &var) : value{var.uniq_name}
{
}
-TagVal::TagVal(const std::string &val)
- : value{ val }
+TagVal::TagVal(const std::string &val) : value{val}
{
}
-TagVal::TagVal(const char *val)
- : value{ std::string(val) }
+TagVal::TagVal(const char *val) : value{std::string(val)}
{
}
-TagVal::TagVal(const DataType &data_type)
- : value{ get_cl_type_from_data_type(data_type) }
+TagVal::TagVal(const DataType &data_type) : value{get_cl_type_from_data_type(data_type)}
{
}
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
index a49d38e10c..c17f131ada 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_GPUKERNELVARIABLETABLE
#include "arm_compute/core/ITensorInfo.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "support/AclRequires.h"
#include "support/StringSupport.h"
@@ -55,11 +56,11 @@ public:
struct TensorVariable
{
public:
- TensorVariable() = default;
- TensorVariable(const TensorVariable &) = default;
+ TensorVariable() = default;
+ TensorVariable(const TensorVariable &) = default;
TensorVariable &operator=(const TensorVariable &) = default;
- ITensorInfo::Id id{ ITensorInfo::invalid_tensor_id };
- std::string uniq_name{ "empty" }; // Unique name, also the final variable name used in the built code
+ ITensorInfo::Id id{ITensorInfo::invalid_tensor_id};
+ std::string uniq_name{"empty"}; // Unique name, also the final variable name used in the built code
GpuKernelArgumentInfo kernel_argument_info{};
bool has_valid_id() const
{
@@ -76,7 +77,10 @@ public:
* @param[in] argument_info Kernel argument information
* @param[in] alias Alias for the variable. Will be used as part of the variable name
*/
- void declare_variable(const GpuKernelComponentGroup &comp_group, const ITensorInfo *tensor, GpuKernelArgumentInfo argument_info, const std::string &alias = "unnamed");
+ void declare_variable(const GpuKernelComponentGroup &comp_group,
+ const ITensorInfo *tensor,
+ GpuKernelArgumentInfo argument_info,
+ const std::string &alias = "unnamed");
/** Get the @ref TensorVariable associated with @p tensor
*
* @param[in] tensor Tensor info to be queried
@@ -106,8 +110,7 @@ struct TagVal
TagVal(const GpuKernelVariableTable::TensorVariable &var);
/** Construct a @ref TagVal from an integral type */
template <typename T, ARM_COMPUTE_REQUIRES_TA(std::is_integral<T>::value)>
- TagVal(T val)
- : value{ support::cpp11::to_string(val) }
+ TagVal(T val) : value{support::cpp11::to_string(val)}
{
}
/** Construct a @ref TagVal from a string */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
index 4a1fb142d6..9d0b4f592a 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/CL/CLCompileContext.h"
#include "arm_compute/core/ITensorInfo.h"
#include "arm_compute/core/Window.h"
+
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/components/Types.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
@@ -57,8 +58,7 @@ public:
* @param[in] id Component id
* @param[in] tensors Tensor arguments to the components
*/
- IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : _id{ id }, _tensors{ tensors }
+ IGpuTemplateComponentWriter(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors}
{
}
/** Destructor */
@@ -112,7 +112,7 @@ public:
/** Generate the header list used in the component */
virtual std::set<std::string> get_headers_list() const
{
- return std::set<std::string> {};
+ return std::set<std::string>{};
}
/** Generate the execution window for the component */
virtual Window get_window() const
@@ -131,7 +131,7 @@ public:
}
private:
- ComponentId _id{ -1 };
+ ComponentId _id{-1};
ArgumentPack<ITensorInfo> _tensors{};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
index 3c7c843dd8..c165fb5f33 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/utils/ActivationFunctionUtils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
@@ -39,10 +40,7 @@ namespace dynamic_fusion
ClTemplateActivation::ClTemplateActivation(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
@@ -62,7 +60,7 @@ std::string ClTemplateActivation::get_component_code(const ComponentGroup &comp_
code = R"_(
//------------------ START KERNEL {{meta_kernel_id}} ---------------------
)_";
- if(is_root)
+ if (is_root)
{
code += R"_(
// IN(src) {{src}}
@@ -104,17 +102,11 @@ LOOP_UNROLLING(int, i, 0, 1, M0,
void ClTemplateActivation::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
TagLUT ClTemplateActivation::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -173,7 +165,7 @@ std::string ClTemplateActivation::get_config_id() const
std::set<std::string> ClTemplateActivation::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h", "activation_float_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h", "activation_float_helpers.h"};
}
Window ClTemplateActivation::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
index ec78cf6ce5..88ee370342 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateActivation.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/function_info/ActivationLayerInfo.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentActivation.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
index 4956879ad3..0da3a73801 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateCast.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
@@ -35,7 +36,7 @@ namespace experimental
namespace dynamic_fusion
{
ClTemplateCast::ClTemplateCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}, _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -62,7 +63,7 @@ std::string ClTemplateCast::get_component_code(const ComponentGroup &comp_group)
//------------------ START KERNEL {{meta_kernel_id}} CAST ---------------------
)_";
- if(is_root)
+ if (is_root)
{
code += R"_(
// IN_0(src) {{src}}
@@ -82,14 +83,15 @@ TILE(uint, M0, 1, g_dst_indirect_y);
{
)_";
- if(kernel_name == "cast_down" && is_data_type_quantized(_src->data_type()))
+ if (kernel_name == "cast_down" && is_data_type_quantized(_src->data_type()))
{
code += R"_(
{{tmp}}[m0].v ^= (VEC_DATA_TYPE({{DATA_TYPE_IN}}, N0))0x80;
)_";
}
- if(kernel_name == "cast_down" && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
+ if (kernel_name == "cast_down" &&
+ (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
{
code += R"_(
{{dst}}[m0].v = CONVERT_SAT({{tmp}}[m0].v, VEC_DATA_TYPE({{DATA_TYPE_OUT}}, N0));
@@ -106,7 +108,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
})
)_";
- if(is_root)
+ if (is_root)
{
code += R"_(
LOOP_UNROLLING(int, i, 0, 1, M0,
@@ -128,17 +130,11 @@ TILE(uint, M0, 1, g_dst_indirect_y);
void ClTemplateCast::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
TagLUT ClTemplateCast::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -199,7 +195,7 @@ std::string ClTemplateCast::get_config_id() const
std::set<std::string> ClTemplateCast::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateCast::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
index ab7cc9f05a..8380620ab2 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.cpp
@@ -36,17 +36,17 @@ ClTemplateDepthwiseConv2d::ClTemplateDepthwiseConv2d(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuTemplateComponentWriter{ id, tensors },
+ : IGpuTemplateComponentWriter{id, tensors},
_src{},
_weight{},
_bias{},
_dst{},
- _attributes{ attributes },
- _settings{ settings }
+ _attributes{attributes},
+ _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- if(this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+ if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
{
_bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
}
@@ -71,7 +71,7 @@ std::string ClTemplateDepthwiseConv2d::get_component_code(const ComponentGroup &
// IN_1(wei) {{weight}}
)_";
- if(_bias != nullptr && _bias->has_valid_id())
+ if (_bias != nullptr && _bias->has_valid_id())
{
code += R"_(
// IN_1(bia) {{bias}}
@@ -113,7 +113,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
})
)_";
- if(_weight->dimension(height_idx) < 5)
+ if (_weight->dimension(height_idx) < 5)
{
code += R"_(
LOOP_UNROLLING(int, yk, 0, 1, _IWEI_HEIGHT,
@@ -147,7 +147,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
{
)_";
- if(!_settings.is_fma_available())
+ if (!_settings.is_fma_available())
{
code += R"_(
{{dst}}[m0].v += a[xk + m0].v * b[xk].v;
@@ -166,14 +166,14 @@ TILE(uint, M0, 1, g_dst_indirect_y);
}
)_";
- if(_weight->dimension(height_idx) < 5)
+ if (_weight->dimension(height_idx) < 5)
{
code += R"_(
)
)_";
}
- if(_bias && _bias->has_valid_id())
+ if (_bias && _bias->has_valid_id())
{
code += R"_(
TILE({{BIA_DATA_TYPE}}, 1, N0, {{bias}});
@@ -198,44 +198,31 @@ TILE(uint, M0, 1, g_dst_indirect_y);
return code;
}
-void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+void ClTemplateDepthwiseConv2d::declare_variables(GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
- const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image() ?
- GpuKernelArgumentInfo::Type::Tensor_4D_t_Image :
- GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(input_type),
- "src");
-
- const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image() ?
- GpuKernelArgumentInfo::Type::Tensor_4D_t_Image :
- GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
-
- vtable.declare_variable(
- comp_group,
- _weight,
- GpuKernelArgumentInfo(weight_type),
- "weight");
-
- if(_bias != nullptr && _bias->has_valid_id()) // optional bias
+ const GpuKernelArgumentInfo::Type input_type = _settings.export_input_to_cl_image()
+ ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
+ : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(input_type), "src");
+
+ const GpuKernelArgumentInfo::Type weight_type = _settings.export_weights_to_cl_image()
+ ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
+ : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+
+ vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
+
+ if (_bias != nullptr && _bias->has_valid_id()) // optional bias
{
- vtable.declare_variable(
- comp_group,
- _bias,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector),
- "bias");
+ vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
}
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
-TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
TagLUT lut{};
@@ -243,7 +230,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab
lut["src"] = vtable.get_variable(_src);
lut["weight"] = vtable.get_variable(_weight);
- if(_bias != nullptr && _bias->has_valid_id()) // optional bias
+ if (_bias != nullptr && _bias->has_valid_id()) // optional bias
{
lut["bias"] = vtable.get_variable(_bias);
lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
@@ -259,7 +246,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab
lut["SRC_DATA_TYPE"] = _src->data_type();
lut["WEI_DATA_TYPE"] = _weight->data_type();
- switch(vtable.get_variable(_src).kernel_argument_info.type)
+ switch (vtable.get_variable(_src).kernel_argument_info.type)
{
case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
@@ -271,7 +258,7 @@ TagLUT ClTemplateDepthwiseConv2d::get_tag_lut(const GpuKernelVariableTable &vtab
break;
}
- switch(vtable.get_variable(_weight).kernel_argument_info.type)
+ switch (vtable.get_variable(_weight).kernel_argument_info.type)
{
case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
@@ -318,7 +305,7 @@ CLBuildOptions ClTemplateDepthwiseConv2d::get_build_options(const ComponentGroup
CLBuildOptions build_opts{};
- if(_settings.fast_relaxed_math())
+ if (_settings.fast_relaxed_math())
{
build_opts.add_option("-cl-fast-relaxed-math");
}
@@ -361,7 +348,7 @@ std::string ClTemplateDepthwiseConv2d::get_config_id() const
std::set<std::string> ClTemplateDepthwiseConv2d::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateDepthwiseConv2d::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
index 84b689ef64..5d04c687c3 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDepthwiseConv2d.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEDEPTHWISECONV2D
#include "arm_compute/dynamic_fusion/sketch/attributes/DepthwiseConv2dAttributes.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDepthwiseConv2d.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
index 3322487910..f6a7a58d1d 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.cpp
@@ -23,14 +23,13 @@
*/
#include "ClTemplateDirectConv2d.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
-#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
namespace arm_compute
@@ -43,17 +42,17 @@ ClTemplateDirectConv2d::ClTemplateDirectConv2d(ComponentId
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuTemplateComponentWriter{ id, tensors },
+ : IGpuTemplateComponentWriter{id, tensors},
_src{},
_weight{},
_bias{},
_dst{},
- _attributes{ attributes },
- _settings{ settings }
+ _attributes{attributes},
+ _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_weight = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
- if(this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
+ if (this->tensors().get_const_tensor(TensorType::ACL_SRC_2))
{
_bias = this->tensors().get_const_tensor(TensorType::ACL_SRC_2);
}
@@ -79,7 +78,7 @@ std::string ClTemplateDirectConv2d::get_component_code(const ComponentGroup &com
// IN_0(src) {{src}}
// IN_1(wei) {{weight}}
)_";
- if(_bias && _bias->has_valid_id())
+ if (_bias && _bias->has_valid_id())
{
code += R"_(
// IN_1(bia) {{bias}}
@@ -161,7 +160,7 @@ TILE(uint, M0, 1, g_dst_indirect_y);
}
)_";
- if(leftover_loop)
+ if (leftover_loop)
{
code += R"_(
for(; ck < _ISRC_CHANNELS; ++ck)
@@ -186,9 +185,9 @@ TILE(uint, M0, 1, g_dst_indirect_y);
T_MMUL({{SRC_DATA_TYPE}}, {{WEI_DATA_TYPE}}, {{ACC_DATA_TYPE}}, M0, N0, 1, NT, T, a, b, {{dst}});
}
)_";
-}
+ }
-code += R"_(
+ code += R"_(
#undef _I_WEI_WIDTH
#undef _I_WEI_HEIGHT
#undef _ISRC_WIDTH
@@ -202,7 +201,7 @@ code += R"_(
}
)_";
- if(_bias && _bias->has_valid_id())
+ if (_bias && _bias->has_valid_id())
{
code += R"_(
TILE({{BIA_DATA_TYPE}}, 1, N0, bias0);
@@ -211,9 +210,9 @@ code += R"_(
T_ELTWISE_BROADCAST_ADD_X({{ACC_DATA_TYPE}}, M0, N0, {{dst}}, bias0, {{dst}});
)_";
-}
+ }
-code += R"_(
+ code += R"_(
LOOP_UNROLLING(int, i, 0, 1, M0,
{
g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{DST_WIDTH}} * {{DST_HEIGHT}}) - 1);
@@ -227,32 +226,19 @@ code += R"_(
void ClTemplateDirectConv2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image() ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
- vtable.declare_variable(
- comp_group,
- _weight,
- GpuKernelArgumentInfo(weight_type),
- "weight");
-
- if(_bias && _bias->has_valid_id()) // optional bias
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ const GpuKernelArgumentInfo::Type weight_type = _settings.export_to_cl_image()
+ ? GpuKernelArgumentInfo::Type::Tensor_4D_t_Image
+ : GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer;
+ vtable.declare_variable(comp_group, _weight, GpuKernelArgumentInfo(weight_type), "weight");
+
+ if (_bias && _bias->has_valid_id()) // optional bias
{
- vtable.declare_variable(
- comp_group,
- _bias,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector),
- "bias");
+ vtable.declare_variable(comp_group, _bias, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Vector), "bias");
}
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(common_tensor_type),
- "dst");
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
}
TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -262,7 +248,7 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
lut["src"] = vtable.get_variable(_src);
lut["weight"] = vtable.get_variable(_weight);
- if(_bias && _bias->has_valid_id()) // optional bias
+ if (_bias && _bias->has_valid_id()) // optional bias
{
lut["bias"] = vtable.get_variable(_bias);
lut["BIA_DATA_TYPE"] = get_cl_type_from_data_type(_bias->data_type());
@@ -279,34 +265,34 @@ TagLUT ClTemplateDirectConv2d::get_tag_lut(const GpuKernelVariableTable &vtable,
lut["WEI_DATA_TYPE"] = _weight->data_type();
lut["SRC_TENSOR_TYPE"] = "BUFFER";
- switch(vtable.get_variable(_weight).kernel_argument_info.type)
+ switch (vtable.get_variable(_weight).kernel_argument_info.type)
{
case GpuKernelArgumentInfo::Type::Image_Export_To_ClImage2D:
case GpuKernelArgumentInfo::Type::Image_3D_Export_To_ClImage2D:
case GpuKernelArgumentInfo::Type::Tensor_4D_t_Image:
- {
- lut["WEI_TENSOR_TYPE"] = "IMAGE";
- break;
- }
+ {
+ lut["WEI_TENSOR_TYPE"] = "IMAGE";
+ break;
+ }
default:
- {
- lut["WEI_TENSOR_TYPE"] = "BUFFER";
- break;
- }
+ {
+ lut["WEI_TENSOR_TYPE"] = "BUFFER";
+ break;
+ }
}
- const auto width_idx = 1;
- const auto height_idx = 2;
+ const auto width_idx = 1;
+ const auto height_idx = 2;
const auto channel_idx = 0;
- lut["SRC_WIDTH"] = _src->dimension(width_idx);
- lut["SRC_HEIGHT"] = _src->dimension(height_idx);
+ lut["SRC_WIDTH"] = _src->dimension(width_idx);
+ lut["SRC_HEIGHT"] = _src->dimension(height_idx);
lut["SRC_CHANNELS"] = _src->dimension(channel_idx);
- lut["WEI_WIDTH"] = _weight->dimension(width_idx);
- lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
+ lut["WEI_WIDTH"] = _weight->dimension(width_idx);
+ lut["WEI_HEIGHT"] = _weight->dimension(height_idx);
- lut["DST_WIDTH"] = _dst->dimension(width_idx);
- lut["DST_HEIGHT"] = _dst->dimension(height_idx);
+ lut["DST_WIDTH"] = _dst->dimension(width_idx);
+ lut["DST_HEIGHT"] = _dst->dimension(height_idx);
lut["DST_CHANNELS"] = _dst->dimension(channel_idx);
lut["STRIDE_X"] = _attributes.stride().x();
@@ -324,14 +310,14 @@ CLBuildOptions ClTemplateDirectConv2d::get_build_options(const ComponentGroup &c
{
const unsigned int channel_idx = get_data_layout_dimension_index(_src->data_layout(), DataLayoutDimension::CHANNEL);
- const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
- const unsigned int n0 = root_window.x().step();
- const unsigned int m0 = root_window.y().step();
- const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
+ const auto root_window = comp_group.get_root_component()->template_writer()->get_window();
+ const unsigned int n0 = root_window.x().step();
+ const unsigned int m0 = root_window.y().step();
+ const unsigned int k0 = adjust_vec_size(_settings.direct_conv_descriptor().k0, _src->dimension(channel_idx));
const unsigned int partial_store_n0 = _dst->dimension(0) % n0;
CLBuildOptions build_opts{};
- if(_settings.fast_relaxed_math())
+ if (_settings.fast_relaxed_math())
{
build_opts.add_option("-cl-fast-relaxed-math");
}
@@ -379,7 +365,7 @@ std::string ClTemplateDirectConv2d::get_config_id() const
std::set<std::string> ClTemplateDirectConv2d::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateDirectConv2d::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
index 8988d3ca1c..03c8cd2f15 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateDirectConv2d.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/Conv2dAttributes.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
index c0481ae190..78bff3c3f3 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.cpp
@@ -23,14 +23,13 @@
*/
#include "ClTemplateElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
-
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
-#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
namespace arm_compute
@@ -44,11 +43,7 @@ constexpr unsigned int vector_size_byte_opencl = 16;
ClTemplateElementwiseBinary::ClTemplateElementwiseBinary(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors },
- _lhs{},
- _rhs{},
- _dst{},
- _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}
{
_lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -69,67 +64,67 @@ std::string ClTemplateElementwiseBinary::get_component_code(const ComponentGroup
const bool is_rhs_input = comp_group.is_input_tensor(_rhs);
code =
-R"_(
+ R"_(
//------------------ START KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
)_";
- if(is_root)
+ if (is_root)
{
code +=
-R"_(
+ R"_(
TILE(uint, M0, 1, g_dst_indirect_y);
)_";
}
- if(is_lhs_input)
+ if (is_lhs_input)
{
code +=
-R"_(
+ R"_(
TILE({{DATA_TYPE}}, {{lhs_m0}}, N0, {{lhs}});
)_";
}
- if(is_rhs_input)
+ if (is_rhs_input)
{
code +=
-R"_(
+ R"_(
TILE({{DATA_TYPE}}, {{rhs_m0}}, N0, {{rhs}});
)_";
}
code +=
-R"_(
+ R"_(
{
)_";
- if(is_lhs_input)
+ if (is_lhs_input)
{
code +=
-R"_(
+ R"_(
{{lhs}}_offset_first_element_in_bytes += g_ind_2 * {{lhs}}_stride_w;
T_LOAD({{DATA_TYPE}}, {{lhs_m0}}, {{lhs_n0}}, BUFFER, {{lhs}}, {{lhs_start_ind_0}}, {{lhs_start_ind_1}}, 1, {{lhs}}_stride_y, {{lhs}});
)_";
}
- if(is_rhs_input)
+ if (is_rhs_input)
{
code +=
-R"_(
+ R"_(
{{rhs}}_offset_first_element_in_bytes += g_ind_2 * {{rhs}}_stride_w;
T_LOAD({{DATA_TYPE}}, {{rhs_m0}}, {{rhs_n0}}, BUFFER, {{rhs}}, {{rhs_start_ind_0}}, {{rhs_start_ind_1}}, 1, {{rhs}}_stride_y, {{rhs}});
)_";
}
code +=
-R"_(
+ R"_(
T_ELTWISE_{{BROADCAST_OP}}{{ELTWISE_OP}}({{DATA_TYPE}}, M0, N0, {{lhs}}, {{rhs}}, {{dst}});
)_";
- if(is_root)
+ if (is_root)
{
// Calculate the destination indirect Y
code +=
-R"_(
+ R"_(
LOOP_UNROLLING(int, i, 0, 1, M0,
{
g_dst_indirect_y[i].v = (uint)min(g_ind_1 + i, (int)({{arg_dst}}_w * {{arg_dst}}_h) - 1);
@@ -139,7 +134,7 @@ R"_(
}
code +=
-R"_(
+ R"_(
}
//------------------ END KERNEL {{meta_kernel_id}} {{ELTWISE_OP}} ---------------------
)_";
@@ -147,28 +142,18 @@ R"_(
return code;
}
-void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+void ClTemplateElementwiseBinary::declare_variables(GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _lhs,
- GpuKernelArgumentInfo(common_tensor_type),
- "lhs");
-
- vtable.declare_variable(
- comp_group,
- _rhs,
- GpuKernelArgumentInfo(common_tensor_type),
- "rhs");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(common_tensor_type),
- "dst");
+ vtable.declare_variable(comp_group, _lhs, GpuKernelArgumentInfo(common_tensor_type), "lhs");
+
+ vtable.declare_variable(comp_group, _rhs, GpuKernelArgumentInfo(common_tensor_type), "rhs");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
}
-TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
TagLUT lut{};
@@ -182,7 +167,7 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
lut["dst"] = vtable.get_variable(_dst);
lut["arg_dst"] = vtable.get_variable(comp_group.get_any_dst_tensor());
- switch(_attributes.operation())
+ switch (_attributes.operation())
{
case Attributes::ElementwiseOp::Add:
lut["ELTWISE_OP"] = "ADD";
@@ -197,10 +182,10 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
ARM_COMPUTE_ERROR("Arithmetic Operation not supported");
}
- ARM_COMPUTE_ERROR_ON(
- comp_group.is_intermediate_tensor(_lhs) && detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
- ARM_COMPUTE_ERROR_ON(
- comp_group.is_intermediate_tensor(_rhs) && detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
+ ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_lhs) &&
+ detail::have_different_dimensions(_lhs->tensor_shape(), _dst->tensor_shape(), 0));
+ ARM_COMPUTE_ERROR_ON(comp_group.is_intermediate_tensor(_rhs) &&
+ detail::have_different_dimensions(_rhs->tensor_shape(), _dst->tensor_shape(), 0));
// Set broadcast parameters
// PRE: All tensors are broadcast-compatible
@@ -228,9 +213,7 @@ TagLUT ClTemplateElementwiseBinary::get_tag_lut(const GpuKernelVariableTable &vt
lut["rhs_m0"] = (rhs_broadcast_yz) ? "1" : "M0";
lut["rhs_start_ind_1"] = (rhs_broadcast_yz) ? "0" : "g_ind_1";
- lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" :
- (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" :
- "";
+ lut["BROADCAST_OP"] = (lhs_broadcast_yz) ? "BROADCAST_LHS_X_" : (rhs_broadcast_yz) ? "BROADCAST_RHS_X_" : "";
return lut;
}
@@ -268,7 +251,7 @@ std::string ClTemplateElementwiseBinary::get_config_id() const
std::set<std::string> ClTemplateElementwiseBinary::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateElementwiseBinary::get_window() const
@@ -279,8 +262,9 @@ Window ClTemplateElementwiseBinary::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) and upper dimensions unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
index 8cca954efe..991c0eca44 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateElementwiseBinary.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATEELEMENTWISEBINARY
#include "arm_compute/core/experimental/Types.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentElementwiseBinary.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
@@ -48,9 +49,7 @@ public:
* @param[in] tensors Tensor arguments to the components
* @param[in] attributes Component attributes
*/
- ClTemplateElementwiseBinary(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ ClTemplateElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
/** Prevent instances of this class from being copy constructed */
ClTemplateElementwiseBinary(const ClTemplateElementwiseBinary &elementwise) = delete;
/** Prevent instances of this class from being copied */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
index a8d8d32b12..522c33a022 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.cpp
@@ -26,6 +26,7 @@
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
@@ -38,16 +39,12 @@ namespace dynamic_fusion
{
namespace
{
- constexpr unsigned int serial_vector_size = 8;
+constexpr unsigned int serial_vector_size = 8;
} // namespace
ClTemplateLogits1DMaxShiftExpSum::ClTemplateLogits1DMaxShiftExpSum(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _sum{},
- _dst{},
- _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_sum = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -79,7 +76,7 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
const bool beta_defined = (_attributes.beta() != 1.f);
- if(beta_defined)
+ if (beta_defined)
{
code += R"_(
VEC_TYPE beta = (VEC_TYPE){{BETA}};
@@ -91,7 +88,7 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
const unsigned int vector_size = adjust_vec_size(_serial_vector_size, reduction_dim_size);
const bool non_multiple_of_n0 = ((reduction_dim_size % vector_size) != 0);
- if(non_multiple_of_n0)
+ if (non_multiple_of_n0)
{
code += R"_(
VEC_TYPE data = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
@@ -111,19 +108,19 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
VEC_TYPE sum1D = 0;
)_";
- if(non_multiple_of_n0)
+ if (non_multiple_of_n0)
{
code += R"_(
data -= max_val;
)_";
- if(beta_defined)
+ if (beta_defined)
{
code += R"_(
data *= beta;
)_";
}
- if(_attributes.is_log_softmax())
+ if (_attributes.is_log_softmax())
{
code += R"_(
VSTORE_PARTIAL(N0, PARTIAL_N0)
@@ -153,14 +150,14 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
data -= max_val;
)_";
- if(beta_defined)
+ if (beta_defined)
{
code += R"_(
data *= beta;
)_";
}
- if(_attributes.is_log_softmax())
+ if (_attributes.is_log_softmax())
{
code += R"_(
VSTORE(N0)
@@ -191,28 +188,18 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_component_code(const Component
return code;
}
-void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+void ClTemplateLogits1DMaxShiftExpSum::declare_variables(GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _sum,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "sum");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
+
+ vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
}
-TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
+TagLUT ClTemplateLogits1DMaxShiftExpSum::get_tag_lut(const GpuKernelVariableTable &vtable,
+ const ComponentGroup &comp_group) const
{
ARM_COMPUTE_UNUSED(comp_group);
@@ -241,8 +228,8 @@ CLBuildOptions ClTemplateLogits1DMaxShiftExpSum::get_build_options(const Compone
ARM_COMPUTE_UNUSED(comp_group);
CLBuildOptions build_opts{};
- const unsigned int reduction_dim_size = _src->dimension(0);
- const unsigned int vector_size = adjust_vec_size(serial_vector_size, reduction_dim_size);
+ const unsigned int reduction_dim_size = _src->dimension(0);
+ const unsigned int vector_size = adjust_vec_size(serial_vector_size, reduction_dim_size);
build_opts.add_option("-DN0=" + support::cpp11::to_string(vector_size));
build_opts.add_option("-DPARTIAL_N0=" + support::cpp11::to_string((reduction_dim_size % vector_size)));
@@ -264,7 +251,7 @@ std::string ClTemplateLogits1DMaxShiftExpSum::get_config_id() const
std::set<std::string> ClTemplateLogits1DMaxShiftExpSum::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateLogits1DMaxShiftExpSum::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
index 5d232c0cf2..ac9ddaa9d4 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DMaxShiftExpSum.h
@@ -46,7 +46,9 @@ public:
* @param[in] tensors Tensor arguments to the components
* @param[in] attributes Component attributes
*/
- ClTemplateLogits1DMaxShiftExpSum(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
+ ClTemplateLogits1DMaxShiftExpSum(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes);
/** Prevent instances of this class from being copy constructed */
ClTemplateLogits1DMaxShiftExpSum(const ClTemplateLogits1DMaxShiftExpSum &) = delete;
/** Prevent instances of this class from being copied */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
index 056e570a25..7d7c3e6673 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.cpp
@@ -25,6 +25,7 @@
#include "src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateLogits1DNorm.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
@@ -38,11 +39,7 @@ namespace dynamic_fusion
ClTemplateLogits1DNorm::ClTemplateLogits1DNorm(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _sum{},
- _dst{},
- _attributes{ attributes }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _sum{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_sum = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -76,7 +73,7 @@ std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &com
data0 = VLOAD(N0)(0, (__global {{DATA_TYPE}} *)src_addr);
)_";
- if(_attributes.is_log_softmax())
+ if (_attributes.is_log_softmax())
{
code += R"_(
sum_val = log(sum_val);
@@ -101,23 +98,11 @@ std::string ClTemplateLogits1DNorm::get_component_code(const ComponentGroup &com
void ClTemplateLogits1DNorm::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _sum,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "sum");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "src");
+
+ vtable.declare_variable(comp_group, _sum, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "sum");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_3D), "dst");
}
TagLUT ClTemplateLogits1DNorm::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -168,14 +153,14 @@ std::string ClTemplateLogits1DNorm::get_config_id() const
std::set<std::string> ClTemplateLogits1DNorm::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateLogits1DNorm::get_window() const
{
ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
constexpr unsigned int serial_vector_size = 16;
- const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0));
+ const unsigned int vector_size = adjust_vec_size(serial_vector_size, _src->dimension(0));
Window win = calculate_max_window(*_src, Steps(vector_size));
return win.collapse(win, Window::DimZ);
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
index 34840c2100..ebb0374501 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.cpp
@@ -23,14 +23,13 @@
*/
#include "ClTemplatePool2d.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
-#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
-
-#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/utils/misc/ShapeCalculator.h"
#include "arm_compute/core/utils/StringUtils.h"
-#include "src/core/helpers/WindowHelpers.h"
+#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentDirectConv2d.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
namespace arm_compute
@@ -50,11 +49,7 @@ ClTemplatePool2d::ClTemplatePool2d(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes },
- _settings{ settings }
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -71,7 +66,7 @@ std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_grou
ARM_COMPUTE_UNUSED(comp_group);
// Condition to use 2x2 optimized kernel
- if(_attributes.pool_size() == Size2D(2, 2))
+ if (_attributes.pool_size() == Size2D(2, 2))
{
return get_2x2_kernel_code();
}
@@ -83,11 +78,13 @@ std::string ClTemplatePool2d::get_component_code(const ComponentGroup &comp_grou
std::string ClTemplatePool2d::get_MxN_kernel_code() const
{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
+ const auto pool_type = _attributes.pool_type();
+ const bool fp_mixed_precision =
+ (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
// Define pool op macro.
- std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
+ std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
+ : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
// Kernel start
// Note: If C is not multiple of N0, we shift back of PARTIAL_N0 elements to compute the leftover elements for get_global_id(0) == 0
@@ -129,7 +126,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
)_";
// Determine filter size depending on if padding is excluded or not
- if(_attributes.exclude_padding())
+ if (_attributes.exclude_padding())
{
code += R"_(
const int filter_size = (pool_y_e - pool_y_s) * (pool_x_e - pool_x_s);
@@ -144,7 +141,8 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
// Loop through pool size
// if global pooling
- if(_attributes.pool_size().x() == _src->dimension(width_idx) && _attributes.pool_size().y() == _src->dimension(height_idx))
+ if (_attributes.pool_size().x() == _src->dimension(width_idx) &&
+ _attributes.pool_size().y() == _src->dimension(height_idx))
{
// Begin loop
code += R"_(
@@ -173,7 +171,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
// if condition inside loop - use 32bit acc if mixed_precision.
// End loop through pooling section.
- if(fp_mixed_precision)
+ if (fp_mixed_precision)
{
// In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
code += R"_(
@@ -194,7 +192,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
}
// For Pool AVG ONLY, divide pool output by filter size
- if(pool_type == PoolingType::AVG)
+ if (pool_type == PoolingType::AVG)
{
code += R"_(
res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
@@ -202,7 +200,7 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
}
// If mixed precision convert datatype before storing. Then end kernel.
- if(fp_mixed_precision)
+ if (fp_mixed_precision)
{
code += R"_(
VEC_DATA_TYPE({{DATA_TYPE}}, N0)
@@ -228,9 +226,11 @@ std::string ClTemplatePool2d::get_MxN_kernel_code() const
std::string ClTemplatePool2d::get_2x2_kernel_code() const
{
- const auto pool_type = _attributes.pool_type();
- const bool fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
- std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_" : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
+ const auto pool_type = _attributes.pool_type();
+ const bool fp_mixed_precision =
+ (_src->data_type() == DataType::F16) && _settings.mixed_precision() && pool_type != PoolingType::MAX;
+ std::string pool_op = (pool_type == PoolingType::AVG) ? R"_(#define POOL_OP(x,y) ((x) + (y)))_"
+ : R"_(#define POOL_OP(x,y) (fmax((x), (y))) )_";
std::string code = R"_(
//------------------ START KERNEL {{meta_kernel_id}} ---------------------
@@ -274,7 +274,7 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
REPEAT_VAR_INIT_TO_CONST(4, VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0), data, 0);
)_";
- if(fp_mixed_precision)
+ if (fp_mixed_precision)
{
// In case of FP_MIXED_PRECISION, ACC_DATA_TYPE is != DATA_TYPE
code += R"_(
@@ -294,7 +294,7 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
)_";
}
- if(pool_type != PoolingType::MAX)
+ if (pool_type != PoolingType::MAX)
{
// Make invalid the values loaded if the x or y coordinate was clamped (out-of-bound)
code += R"_(
@@ -321,10 +321,10 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
res0 = POOL_OP(res0, data3);
)_";
- if(pool_type == PoolingType::AVG)
+ if (pool_type == PoolingType::AVG)
{
// If avg pooling divide result accordingly.
- if(_attributes.exclude_padding())
+ if (_attributes.exclude_padding())
{
code += R"_(
res0 /= (VEC_DATA_TYPE({{ACC_DATA_TYPE}}, N0))filter_size;
@@ -339,7 +339,7 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
}
// Store result
- if(fp_mixed_precision)
+ if (fp_mixed_precision)
{
code += R"_(
VEC_DATA_TYPE({{DATA_TYPE}}, N0)
@@ -365,17 +365,11 @@ std::string ClTemplatePool2d::get_2x2_kernel_code() const
void ClTemplatePool2d::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
@@ -391,12 +385,15 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["meta_kernel_id"] = id();
// Retrieve relevant data
- const auto padding = _attributes.pad();
- const auto stride = _attributes.stride();
- const auto pool_size = _attributes.pool_size();
- const auto data_type = _src->data_type();
- const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
- const std::string max_initial_value = _settings.use_inf_as_limit() ? "(-INFINITY)" : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
+ const auto padding = _attributes.pad();
+ const auto stride = _attributes.stride();
+ const auto pool_size = _attributes.pool_size();
+ const auto data_type = _src->data_type();
+ const auto use_fp_mixed_precision = (_src->data_type() == DataType::F16) && _settings.mixed_precision() &&
+ _attributes.pool_type() != PoolingType::MAX;
+ const std::string max_initial_value =
+ _settings.use_inf_as_limit() ? "(-INFINITY)"
+ : float_to_string_with_full_precision(std::numeric_limits<float>::lowest());
// pool specific
lut["STRIDE_X"] = stride.x();
@@ -407,7 +404,8 @@ TagLUT ClTemplatePool2d::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["POOL_SIZE_Y"] = pool_size.height;
// Datatypes and variables
- lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type((use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
+ lut["ACC_DATA_TYPE"] = get_cl_type_from_data_type(
+ (use_fp_mixed_precision) ? (DataType::F32) : (data_type)); // Type of accumulators to use.
lut["DATA_TYPE"] = get_cl_type_from_data_type(data_type);
lut["SRC_WIDTH"] = _src->dimension(width_idx);
lut["SRC_HEIGHT"] = _src->dimension(height_idx);
@@ -454,14 +452,14 @@ std::string ClTemplatePool2d::get_config_id() const
std::set<std::string> ClTemplatePool2d::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h", "repeat.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h", "repeat.h"};
}
Window ClTemplatePool2d::get_window() const
{
ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
const auto output_shape = _dst->tensor_shape();
- const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
+ const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
// Create and configure kernel window
auto win = calculate_max_window(output_shape, Steps(vec_size));
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
index ef1c100f44..d1d3c01669 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplatePool2d.h
@@ -27,6 +27,7 @@
#include "arm_compute/core/experimental/Types.h"
#include "arm_compute/dynamic_fusion/sketch/attributes/Pool2dAttributes.h"
#include "arm_compute/dynamic_fusion/sketch/gpu/operators/GpuPool2d.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentPool2d.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
index 8b50f1e209..c882353fcb 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.cpp
@@ -25,6 +25,7 @@
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
@@ -36,11 +37,8 @@ namespace dynamic_fusion
{
constexpr unsigned int vector_size_byte_opencl = 16;
-ClTemplateReshape::ClTemplateReshape(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors)
- : IGpuTemplateComponentWriter{ id, tensors },
- _src{},
- _dst{}
+ClTemplateReshape::ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -97,23 +95,17 @@ TILE(uint, M0, 1, g_dst_indirect_y);
void ClTemplateReshape::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(common_tensor_type),
- "dst");
+ vtable.declare_variable(comp_group, _src,
+ GpuKernelArgumentInfo(common_tensor_type), // GpuKernelArgumentInfo::Type::Image_3D
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(common_tensor_type), "dst");
}
TagLUT ClTemplateReshape::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
ARM_COMPUTE_UNUSED(comp_group);
- TagLUT lut{};
+ TagLUT lut{};
// Arguments and global shared variables
lut["src"] = vtable.get_variable(_src);
@@ -153,7 +145,7 @@ std::string ClTemplateReshape::get_config_id() const
std::set<std::string> ClTemplateReshape::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateReshape::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
index 56b6585b61..838a21db6d 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateReshape.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATERESHAPE
#include "arm_compute/core/experimental/Types.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/cl/ClComponentReshape.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
@@ -42,8 +43,7 @@ public:
* @param[in] id Component id
* @param[in] tensors Tensor arguments to the components
*/
- ClTemplateReshape(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors);
+ ClTemplateReshape(ComponentId id, const ArgumentPack<ITensorInfo> &tensors);
/** Prevent instances of this class from being copy constructed */
ClTemplateReshape(const ClTemplateReshape &reshape) = delete;
/** Prevent instances of this class from being copied */
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
index aaed1d990d..846c712ceb 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateResize.cpp
@@ -27,6 +27,7 @@
#include "arm_compute/core/Utils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
+
#include "src/core/helpers/WindowHelpers.h"
#include "src/core/utils/ScaleUtils.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
@@ -37,8 +38,10 @@ namespace experimental
{
namespace dynamic_fusion
{
-ClTemplateResize::ClTemplateResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const ClTemplateResize::Attributes &attributes)
- : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}, _attributes{ attributes }
+ClTemplateResize::ClTemplateResize(ComponentId id,
+ const ArgumentPack<ITensorInfo> &tensors,
+ const ClTemplateResize::Attributes &attributes)
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -63,9 +66,9 @@ TILE(uint, 1, 1, g_dst_indirect_y);
const int bout = g_ind_2 / {{arg_dst}}_h;
)_";
- if(_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR)
+ if (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR)
{
- if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
+ if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
{
code += R"_(
float xi_f = (g_ind_1 * {{SCALE_X}});
@@ -80,7 +83,7 @@ TILE(uint, 1, 1, g_dst_indirect_y);
)_";
}
- if(_attributes.align_corners())
+ if (_attributes.align_corners())
{
code += R"_(
xi_f = round(xi_f);
@@ -95,9 +98,9 @@ TILE(uint, 1, 1, g_dst_indirect_y);
T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi0, xi0, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, {{dst}});
)_";
}
- else if(_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR)
+ else if (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR)
{
- if(_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
+ if (_attributes.sampling_policy() == SamplingPolicy::TOP_LEFT)
{
code += R"_(
float xi_f = (g_ind_1 * {{SCALE_X}});
@@ -137,7 +140,7 @@ TILE(uint, 1, 1, g_dst_indirect_y);
T_LOAD_NHWC_WITH_DILATION({{SRC_DATA_TYPE}}, 1, 1, N0, {{SRC_TENSOR_TYPE}}, {{src}}, bout, yi1, xi1, g_ind_0, {{src}}_w, {{src}}_h, 1, 1, false, in11);
)_";
- if(is_data_type_float(_src->data_type()))
+ if (is_data_type_float(_src->data_type()))
{
code += R"_(
const {{SRC_DATA_TYPE}} a = ({{SRC_DATA_TYPE}})(xi_f - (float)xi);
@@ -158,9 +161,9 @@ TILE(uint, 1, 1, g_dst_indirect_y);
const float b1 = (1.f - a1);
{{dst}}[0].v = CONVERT_SAT(
- (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
+ (CONVERT(in00[0].v, VEC_DATA_TYPE(float, N0)) * b * b1) +
(CONVERT(in01[0].v, VEC_DATA_TYPE(float, N0)) * a * b1) +
- (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
+ (CONVERT(in10[0].v, VEC_DATA_TYPE(float, N0)) * b * a1) +
(CONVERT(in11[0].v, VEC_DATA_TYPE(float, N0)) * a * a1), VEC_DATA_TYPE({{DST_DATA_TYPE}}, N0));
)_";
}
@@ -179,22 +182,18 @@ TILE(uint, 1, 1, g_dst_indirect_y);
return code;
}
-void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable, const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
+void ClTemplateResize::declare_variables(GpuKernelVariableTable &vtable,
+ const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
-
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
-TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
+TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable,
+ const IGpuTemplateComponentWriter::ComponentGroup &comp_group) const
{
TagLUT lut{};
@@ -212,8 +211,10 @@ TagLUT ClTemplateResize::get_tag_lut(const GpuKernelVariableTable &vtable, const
lut["DST_DATA_TYPE"] = get_cl_type_from_data_type(_dst->data_type());
lut["CONSTANT_VALUE"] = string_from_pixel_value(0, _src->data_type());
- const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
- const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
+ const float scale_x =
+ scale_utils::calculate_resize_ratio(_src->dimension(1), _dst->dimension(1), _attributes.align_corners());
+ const float scale_y =
+ scale_utils::calculate_resize_ratio(_src->dimension(2), _dst->dimension(2), _attributes.align_corners());
lut["SCALE_X"] = float_to_string_with_full_precision(scale_x);
lut["SCALE_Y"] = float_to_string_with_full_precision(scale_y);
@@ -242,7 +243,8 @@ std::string ClTemplateResize::get_config_id() const
std::string config_id{};
config_id += "resize_";
- config_id += (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : "");
+ config_id +=
+ (_attributes.interpolation_policy() == InterpolationPolicy::NEAREST_NEIGHBOR ? "NEAREST_NEIGHBOR" : "");
config_id += (_attributes.interpolation_policy() == InterpolationPolicy::BILINEAR ? "BILINEAR" : "");
config_id += "_";
config_id += (_attributes.sampling_policy() == SamplingPolicy::CENTER ? "center" : "topleft");
@@ -260,7 +262,7 @@ std::string ClTemplateResize::get_config_id() const
std::set<std::string> ClTemplateResize::get_headers_list() const
{
- return std::set<std::string>{ "helpers.h", "tile_helpers.h" };
+ return std::set<std::string>{"helpers.h", "tile_helpers.h"};
}
Window ClTemplateResize::get_window() const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
index 217214ced3..d0ec91e0a9 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.cpp
@@ -32,7 +32,7 @@ namespace experimental
namespace dynamic_fusion
{
ClTemplateStore::ClTemplateStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuTemplateComponentWriter{ id, tensors }, _src{}, _dst{}
+ : IGpuTemplateComponentWriter{id, tensors}, _src{}, _dst{}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
@@ -61,16 +61,10 @@ std::string ClTemplateStore::get_component_code(const ComponentGroup &comp_group
void ClTemplateStore::declare_variables(GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
{
- vtable.declare_variable(
- comp_group,
- _src,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "src");
- vtable.declare_variable(
- comp_group,
- _dst,
- GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
- "dst");
+ vtable.declare_variable(comp_group, _src, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "src");
+ vtable.declare_variable(comp_group, _dst, GpuKernelArgumentInfo(GpuKernelArgumentInfo::Type::Tensor_4D_t_Buffer),
+ "dst");
}
TagLUT ClTemplateStore::get_tag_lut(const GpuKernelVariableTable &vtable, const ComponentGroup &comp_group) const
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
index 3f97a82204..b8c82ceadd 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateStore.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_GPU_TEMPLATE_WRITER_CL_CLTEMPLATESTORE
#include "arm_compute/core/experimental/Types.h"
+
#include "src/dynamic_fusion/sketch/gpu/template_writer/GpuKernelVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
diff --git a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
index eda15f1d95..d3d7c8db83 100644
--- a/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/template_writer/cl/ClTemplateWriter.cpp
@@ -24,6 +24,7 @@
#include "ClTemplateWriter.h"
#include "arm_compute/core/CL/CLKernelLibrary.h"
+
#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
#include "src/dynamic_fusion/sketch/gpu/template_writer/IGpuTemplateComponentWriter.h"
@@ -39,11 +40,11 @@ std::string ClTemplateWriter::replace_tags(const std::string &code_template, con
std::string replaced_code = "";
bool scanning_pattern = false;
std::string pattern_found = "";
- for(size_t i = 0; i < code_template.size() - 1; ++i)
+ for (size_t i = 0; i < code_template.size() - 1; ++i)
{
- if(!scanning_pattern)
+ if (!scanning_pattern)
{
- if(code_template[i] == '{' && code_template[i + 1] == '{')
+ if (code_template[i] == '{' && code_template[i + 1] == '{')
{
i += 1;
scanning_pattern = true;
@@ -56,7 +57,7 @@ std::string ClTemplateWriter::replace_tags(const std::string &code_template, con
}
else
{
- if(code_template[i] == '}' && code_template[i + 1] == '}')
+ if (code_template[i] == '}' && code_template[i + 1] == '}')
{
i += 1;
scanning_pattern = false;
@@ -76,8 +77,7 @@ std::string ClTemplateWriter::replace_tags(const std::string &code_template, con
ClTemplateWriter::~ClTemplateWriter()
{
}
-ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components)
- : _components{ components }
+ClTemplateWriter::ClTemplateWriter(const GpuKernelComponentGroup &components) : _components{components}
{
}
std::string ClTemplateWriter::get_name()
@@ -91,7 +91,7 @@ std::string ClTemplateWriter::get_code()
std::string ClTemplateWriter::get_config_id()
{
std::string config_id = get_name();
- for(const auto &comp : _components)
+ for (const auto &comp : _components)
{
config_id += "--" + comp->template_writer()->get_config_id() + "--";
}
@@ -103,7 +103,7 @@ CLBuildOptions ClTemplateWriter::get_build_options()
{
CLBuildOptions build_opts{};
- for(const auto &comp : _components)
+ for (const auto &comp : _components)
{
build_opts.add_options(comp->template_writer()->get_build_options(_components).options());
}
@@ -122,11 +122,9 @@ std::map<ITensorInfo::Id, GpuKernelArgument> ClTemplateWriter::get_tensors()
{
// Assemble GpuKernelArguments
std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
- for(const auto t : _components.get_argument_tensors())
+ for (const auto t : _components.get_argument_tensors())
{
- tensors.emplace(
- t->id(),
- GpuKernelArgument{ *t, _vtable.get_variable(t).kernel_argument_info });
+ tensors.emplace(t->id(), GpuKernelArgument{*t, _vtable.get_variable(t).kernel_argument_info});
}
return tensors;
}
@@ -141,22 +139,24 @@ std::string ClTemplateWriter::write_code()
std::vector<std::string> component_codes{}; // vector because order matters
// Pass 1: Declare all kernel variables
- for(auto &component : _components)
+ for (auto &component : _components)
{
component->template_writer()->declare_variables(_vtable, _components);
}
// Pass 2: Generate component codes
- for(auto &component : _components)
+ for (auto &component : _components)
{
const auto component_writer = component->template_writer();
auto curr_headers_list = component_writer->get_headers_list();
auto curr_additional_macros = component_writer->get_additional_macros();
auto curr_component_code = component_writer->get_component_code(_components);
- const auto var_lut = component_writer->get_tag_lut(_vtable, _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
+ const auto var_lut = component_writer->get_tag_lut(
+ _vtable,
+ _components); // Ideally can be merged with get_component_code once we have finer-grained code generation technique
component_codes.push_back(replace_tags(curr_component_code, var_lut));
headers_list.insert(curr_headers_list.begin(), curr_headers_list.end());
- if(!additional_macros.empty()) // Some components might not have any
+ if (!additional_macros.empty()) // Some components might not have any
{
additional_macros.insert(replace_tags(curr_additional_macros, var_lut));
}
@@ -165,7 +165,7 @@ std::string ClTemplateWriter::write_code()
// Step 3: Assemble the data gathered by traversing the graph into the string "code"
std::string code = "";
- for(auto &header : headers_list)
+ for (auto &header : headers_list)
{
#if defined(EMBEDDED_KERNELS)
code += CLKernelLibrary::get().get_program(header).first;
@@ -174,16 +174,14 @@ std::string ClTemplateWriter::write_code()
#endif // defined(EMBEDDED_KERNELS)
}
- for(auto &macros : additional_macros)
+ for (auto &macros : additional_macros)
{
code += macros;
}
auto arguments = _components.get_argument_tensors();
- std::sort(arguments.begin(), arguments.end(), [](const ITensorInfo * l, const ITensorInfo * r)
- {
- return l->id() < r->id();
- });
+ std::sort(arguments.begin(), arguments.end(),
+ [](const ITensorInfo *l, const ITensorInfo *r) { return l->id() < r->id(); });
code += write_kernel_signature(_vtable.get_variable_list(arguments));
code += "\n{\n\n";
@@ -198,7 +196,7 @@ std::string ClTemplateWriter::write_code()
tiles_ss << " //------------------ START TILE DECLARATION ---------------------\n";
- for(auto tile : tiles)
+ for (auto tile : tiles)
{
const auto var = _vtable.get_variable(tile);
const auto data_type = get_cl_type_from_data_type(tile->data_type());
@@ -212,7 +210,7 @@ std::string ClTemplateWriter::write_code()
code += tiles_ss.str();
}
- for(const auto &component_code : component_codes)
+ for (const auto &component_code : component_codes)
{
code += component_code;
code += "\n";
@@ -231,7 +229,8 @@ std::string ClTemplateWriter::write_global_section() const
auto leftover_w = dst_w % tile_w;
std::string code = "";
- code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " + std::to_string(leftover_w) + ");\n";
+ code += std::string(" int g_ind_0 = GET_SPATIAL_IDX(0, ") + std::to_string(tile_w) + ", " +
+ std::to_string(leftover_w) + ");\n";
code += std::string(" int g_ind_1 = GET_SPATIAL_IDX(1, ") + std::to_string(tile_h) + ", " + "0);\n";
code += std::string(" int g_ind_2 = GET_SPATIAL_IDX(2, 1, 0);\n\n");
@@ -243,7 +242,7 @@ std::string ClTemplateWriter::write_global_section() const
std::string ClTemplateWriter::write_argument_declaration(const GpuKernelVariableTable::TensorVariable &var) const
{
std::string code;
- switch(var.kernel_argument_info.type)
+ switch (var.kernel_argument_info.type)
{
case GpuKernelArgumentInfo::Type::Vector:
{
@@ -293,11 +292,11 @@ std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTabl
{
std::string code = "\n__kernel void " + write_kernel_name() + "(";
- for(int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
+ for (int i = 0; i < static_cast<int>(argument_list.size()) - 1; ++i)
{
code += write_argument_declaration(argument_list[i]) + ",";
}
- if(static_cast<int>(argument_list.size()) - 1 >= 0)
+ if (static_cast<int>(argument_list.size()) - 1 >= 0)
{
code += write_argument_declaration(argument_list[argument_list.size() - 1]);
}
@@ -308,12 +307,12 @@ std::string ClTemplateWriter::write_kernel_signature(const GpuKernelVariableTabl
}
std::string ClTemplateWriter::write_kernel_name() const
{
- if(_components.empty())
+ if (_components.empty())
{
return "empty_kernel";
}
std::string name = _components.empty() ? "" : _components[0]->template_writer()->get_name();
- for(size_t i = 1; i < _components.size(); ++i)
+ for (size_t i = 1; i < _components.size(); ++i)
{
name += "___";
name += _components[i]->template_writer()->get_name();
diff --git a/src/dynamic_fusion/sketch/utils/DependencyGraph.h b/src/dynamic_fusion/sketch/utils/DependencyGraph.h
index c891e76d8b..c157c2b21c 100644
--- a/src/dynamic_fusion/sketch/utils/DependencyGraph.h
+++ b/src/dynamic_fusion/sketch/utils/DependencyGraph.h
@@ -25,6 +25,7 @@
#define SRC_DYNAMIC_FUSION_SKETCH_UTILS_DEPENDENCYGRAPH
#include "arm_compute/core/Error.h"
+
#include <cstdint>
#include <map>
#include <set>
@@ -68,12 +69,10 @@ public:
OperatorId op{};
std::vector<TensorId> inputs{};
std::vector<TensorId> outputs{};
- friend bool operator==(const OpPack &opp0, const OpPack &opp1)
+ friend bool operator==(const OpPack &opp0, const OpPack &opp1)
{
- return std::make_tuple(
- opp0.op, opp0.inputs, opp0.outputs)
- == std::make_tuple(
- opp1.op, opp1.inputs, opp1.outputs);
+ return std::make_tuple(opp0.op, opp0.inputs, opp0.outputs) ==
+ std::make_tuple(opp1.op, opp1.inputs, opp1.outputs);
}
};
@@ -95,10 +94,13 @@ public:
* @return true If the operator can be added while keeping the graph as a linear sequence
* @return false Otherwise
*/
- bool try_add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs, bool is_output = false) const
+ bool try_add_operator_as_linear(OperatorId op,
+ const std::vector<TensorId> &inputs,
+ const std::vector<TensorId> &outputs,
+ bool is_output = false) const
{
ARM_COMPUTE_UNUSED(op, is_output);
- if(all_ops().empty())
+ if (all_ops().empty())
{
return true;
}
@@ -106,25 +108,25 @@ public:
// If the new operator is not the first operator, at least one input tensor must be
// the output tensor of the last non-output operator. All other input tensors must be
// the global input of the graph (i.e. not the output of any operator).
- if(_last_op_available)
+ if (_last_op_available)
{
auto use_input_from_last_op = false;
- for(auto src_tensor : inputs)
+ for (auto src_tensor : inputs)
{
const auto src_ops = _adj_src_ops.find(src_tensor);
- if(src_ops != _adj_src_ops.end())
+ if (src_ops != _adj_src_ops.end())
{
ARM_COMPUTE_ERROR_ON(src_ops->second.size() > 1);
- if(!src_ops->second.empty())
+ if (!src_ops->second.empty())
{
const auto src_op = src_ops->second[0];
- if(src_op == _last_op)
+ if (src_op == _last_op)
{
- if(use_input_from_last_op)
+ if (use_input_from_last_op)
{
// To be safe, we also forbid using the output tensor
// of the last operator twice.
@@ -143,7 +145,7 @@ public:
}
}
- if(!use_input_from_last_op)
+ if (!use_input_from_last_op)
{
// At least one input tensor must be the output tensor of the last non-output operator.
return false;
@@ -152,9 +154,9 @@ public:
// The output tensor of the new operator must not be the input tensor of any previously
// added operator.
- for(auto dst_tensor : outputs)
+ for (auto dst_tensor : outputs)
{
- if(_adj_dst_ops.find(dst_tensor) != _adj_dst_ops.end())
+ if (_adj_dst_ops.find(dst_tensor) != _adj_dst_ops.end())
{
return false;
}
@@ -168,7 +170,10 @@ public:
* INVARIANT: The list can only grow from head to tail
* INVARIANT: POSTCONDITION: The graph is linear
*/
- void add_operator_as_linear(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs, bool is_output = false)
+ void add_operator_as_linear(OperatorId op,
+ const std::vector<TensorId> &inputs,
+ const std::vector<TensorId> &outputs,
+ bool is_output = false)
{
const auto success = add_operator(op, inputs, outputs, is_output);
ARM_COMPUTE_UNUSED(success);
@@ -183,24 +188,27 @@ public:
* @param[in] outputs Output tensors to the operator
* @param[in] is_output Whether this is an output operator
*/
- bool add_operator(OperatorId op, const std::vector<TensorId> &inputs, const std::vector<TensorId> &outputs, bool is_output = false)
+ bool add_operator(OperatorId op,
+ const std::vector<TensorId> &inputs,
+ const std::vector<TensorId> &outputs,
+ bool is_output = false)
{
- if(operator_exists(op))
+ if (operator_exists(op))
{
return false;
}
_adj_src_tensors[op] = {};
_adj_dst_tensors[op] = {};
- for(auto in_tensor : inputs)
+ for (auto in_tensor : inputs)
{
// Linking input tensor to operator node will never create a cycle / loop because we guarantee
// each op is newly created, so every <input, op> pair / edge is new
link_input(op, in_tensor);
}
- for(auto out_tensor : outputs)
+ for (auto out_tensor : outputs)
{
// If there exists a back path from op's output tensor to op already, then linking the two will create a loop / cycle
- if(path_exists_from_tensor_to_op(out_tensor, op))
+ if (path_exists_from_tensor_to_op(out_tensor, op))
{
remove_operator(op);
return false;
@@ -211,10 +219,10 @@ public:
}
}
- if(!is_output)
+ if (!is_output)
{
_last_op_available = true;
- _last_op = op;
+ _last_op = op;
}
return true;
@@ -230,16 +238,16 @@ public:
std::vector<OpPack> build_operators_sequence() const
{
std::vector<OpPack> ops_seq;
- std::set<Id> done_ops;
- std::set<Id> done_tensors;
+ std::set<Id> done_ops;
+ std::set<Id> done_tensors;
const auto input_tensors = global_src_tensors();
- for(auto tensor : input_tensors)
+ for (auto tensor : input_tensors)
{
done_tensors.insert(tensor);
- for(auto op : _adj_dst_ops.at(tensor))
+ for (auto op : _adj_dst_ops.at(tensor))
{
build_operators_sequence_from_op(op, ops_seq, done_ops, done_tensors);
}
@@ -260,10 +268,8 @@ public:
friend bool operator==(const DependencyGraph &g0, const DependencyGraph &g1)
{
// Do not compare id allocators
- return std::make_tuple(
- g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops)
- == std::make_tuple(
- g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops);
+ return std::make_tuple(g0._adj_src_tensors, g0._adj_dst_tensors, g0._adj_src_ops, g0._adj_dst_ops) ==
+ std::make_tuple(g1._adj_src_tensors, g1._adj_dst_tensors, g1._adj_src_ops, g1._adj_dst_ops);
}
std::vector<OperatorId> src_ops_from_tensor(TensorId tensor) const
{
@@ -280,10 +286,8 @@ public:
std::vector<TensorId> all_tensors() const
{
std::vector<TensorId> tensors{};
- std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors), [](const auto & it)
- {
- return it.first;
- });
+ std::transform(std::begin(_adj_src_ops), std::end(_adj_src_ops), std::back_inserter(tensors),
+ [](const auto &it) { return it.first; });
return tensors;
}
/** Get source tensors of the whole graph
@@ -293,9 +297,9 @@ public:
std::vector<TensorId> global_src_tensors() const
{
std::vector<TensorId> tensors;
- for(auto tensor_src_ops : _adj_src_ops)
+ for (auto tensor_src_ops : _adj_src_ops)
{
- if(tensor_src_ops.second.empty())
+ if (tensor_src_ops.second.empty())
{
tensors.push_back(tensor_src_ops.first);
}
@@ -309,9 +313,9 @@ public:
std::vector<TensorId> global_dst_tensors() const
{
std::vector<TensorId> tensors;
- for(auto tensor_dst_ops : _adj_dst_ops)
+ for (auto tensor_dst_ops : _adj_dst_ops)
{
- if(tensor_dst_ops.second.empty())
+ if (tensor_dst_ops.second.empty())
{
tensors.push_back(tensor_dst_ops.first);
}
@@ -328,14 +332,14 @@ public:
// If a tensor is used to connect the input of an operator and the output of another operator,
// it is not allocated in the memory. The tensor exists as a temporary variable only.
- for(auto src_tensor : _adj_src_ops)
+ for (auto src_tensor : _adj_src_ops)
{
- if(!src_tensor.second.empty())
+ if (!src_tensor.second.empty())
{
const auto dst_tensor = _adj_dst_ops.find(src_tensor.first);
- if(dst_tensor != _adj_dst_ops.end())
+ if (dst_tensor != _adj_dst_ops.end())
{
- if(!dst_tensor->second.empty())
+ if (!dst_tensor->second.empty())
{
tensors.push_back(src_tensor.first);
}
@@ -354,9 +358,9 @@ public:
std::vector<OperatorId> ops{};
const auto op_list = all_ops();
- for(auto op : op_list)
+ for (auto op : op_list)
{
- if(src_ops(op).empty())
+ if (src_ops(op).empty())
{
ops.emplace_back(op);
}
@@ -368,7 +372,7 @@ private:
void link_input(OperatorId op, TensorId in_tensor)
{
ARM_COMPUTE_ERROR_ON(!operator_exists(op));
- if(!tensor_exists(in_tensor))
+ if (!tensor_exists(in_tensor))
{
insert_new_tensor(in_tensor);
}
@@ -379,7 +383,7 @@ private:
void link_output(OperatorId op, TensorId out_tensor)
{
ARM_COMPUTE_ERROR_ON(!operator_exists(op));
- if(!tensor_exists(out_tensor))
+ if (!tensor_exists(out_tensor))
{
insert_new_tensor(out_tensor);
}
@@ -392,7 +396,7 @@ private:
{
ARM_COMPUTE_ERROR_ON(!operator_exists(op));
std::vector<OperatorId> ops{};
- for(TensorId src_tensor : src_tensors(op))
+ for (TensorId src_tensor : src_tensors(op))
{
ops.insert(ops.end(), std::begin(_adj_src_ops.at(src_tensor)), std::end(_adj_src_ops.at(src_tensor)));
}
@@ -402,7 +406,7 @@ private:
{
ARM_COMPUTE_ERROR_ON(!operator_exists(op));
std::vector<OperatorId> ops{};
- for(TensorId dst_tensor : _adj_dst_tensors.at(op))
+ for (TensorId dst_tensor : _adj_dst_tensors.at(op))
{
ops.insert(ops.end(), std::begin(_adj_dst_ops.at(dst_tensor)), std::end(_adj_dst_ops.at(dst_tensor)));
}
@@ -436,10 +440,8 @@ private:
std::vector<OperatorId> all_ops() const
{
std::vector<OperatorId> ops{};
- std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops), [](const auto & it)
- {
- return it.first;
- });
+ std::transform(std::begin(_adj_src_tensors), std::end(_adj_src_tensors), std::back_inserter(ops),
+ [](const auto &it) { return it.first; });
return ops;
}
/** Remove an operator from graph.
@@ -448,25 +450,21 @@ private:
*/
void remove_operator(OperatorId op)
{
- for(auto src_tensor : _adj_src_tensors.at(op))
+ for (auto src_tensor : _adj_src_tensors.at(op))
{
auto &dst_ops = _adj_dst_ops.at(src_tensor);
- dst_ops.erase(
- std::remove(std::begin(dst_ops), std::end(dst_ops), op),
- std::end(dst_ops));
+ dst_ops.erase(std::remove(std::begin(dst_ops), std::end(dst_ops), op), std::end(dst_ops));
}
- for(auto dst_tensor : _adj_dst_tensors.at(op))
+ for (auto dst_tensor : _adj_dst_tensors.at(op))
{
auto &src_ops = _adj_src_ops.at(dst_tensor);
- src_ops.erase(
- std::remove(std::begin(src_ops), std::end(src_ops), op),
- std::end(src_ops));
+ src_ops.erase(std::remove(std::begin(src_ops), std::end(src_ops), op), std::end(src_ops));
}
// Remove any isolated tensors
// An isolated tensor is one where both its _adj_src_ops and _adj_dst_ops are empty
- for(auto t : all_tensors())
+ for (auto t : all_tensors())
{
- if(_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty())
+ if (_adj_src_ops.at(t).empty() && _adj_dst_ops.at(t).empty())
{
_adj_src_ops.erase(t);
_adj_dst_ops.erase(t);
@@ -486,11 +484,12 @@ private:
}
bool operator_exists(OperatorId op) const
{
- return _adj_src_tensors.find(op) != _adj_src_tensors.end() && _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
+ return _adj_src_tensors.find(op) != _adj_src_tensors.end() &&
+ _adj_dst_tensors.find(op) != _adj_dst_tensors.end();
}
bool is_src_tensor_of(OperatorId op, TensorId tensor) const
{
- if(!operator_exists(op) || !tensor_exists(tensor))
+ if (!operator_exists(op) || !tensor_exists(tensor))
{
return false;
}
@@ -499,7 +498,7 @@ private:
}
bool is_dst_tensor_of(OperatorId op, TensorId tensor) const
{
- if(!operator_exists(op) || !tensor_exists(tensor))
+ if (!operator_exists(op) || !tensor_exists(tensor))
{
return false;
}
@@ -525,9 +524,9 @@ private:
std::vector<OperatorId> ops{};
const auto op_list = all_ops();
- for(auto op : op_list)
+ for (auto op : op_list)
{
- if(is_dst_op(op))
+ if (is_dst_op(op))
{
ops.emplace_back(op);
}
@@ -536,13 +535,13 @@ private:
}
bool path_exists_from_tensor_to_op(TensorId src_tensor, OperatorId dst_op) const
{
- if(!tensor_exists(src_tensor) || !operator_exists(dst_op))
+ if (!tensor_exists(src_tensor) || !operator_exists(dst_op))
{
return false;
}
- for(auto child_op : dst_ops_from_tensor(src_tensor))
+ for (auto child_op : dst_ops_from_tensor(src_tensor))
{
- if(path_exists_from_op_to_op(child_op, dst_op))
+ if (path_exists_from_op_to_op(child_op, dst_op))
{
return true;
}
@@ -552,21 +551,21 @@ private:
bool path_exists_from_op_to_op(OperatorId src_op, OperatorId dst_op) const
{
- if(!operator_exists(src_op) || !operator_exists(dst_op))
+ if (!operator_exists(src_op) || !operator_exists(dst_op))
{
return false;
}
- if(src_op == dst_op)
+ if (src_op == dst_op)
{
return true;
}
- if(is_in(src_op, get_dst_ops()))
+ if (is_in(src_op, get_dst_ops()))
{
return false;
}
- for(auto child_tensor : dst_tensors(src_op))
+ for (auto child_tensor : dst_tensors(src_op))
{
- if(path_exists_from_tensor_to_op(child_tensor, dst_op))
+ if (path_exists_from_tensor_to_op(child_tensor, dst_op))
{
return true;
}
@@ -574,16 +573,15 @@ private:
return false;
}
- void build_operators_sequence_from_op(
- Id op,
- std::vector<OpPack> &ops_seq,
- std::set<Id> &done_ops,
- std::set<Id> &done_tensors) const
+ void build_operators_sequence_from_op(Id op,
+ std::vector<OpPack> &ops_seq,
+ std::set<Id> &done_ops,
+ std::set<Id> &done_tensors) const
{
- while(true)
+ while (true)
{
// If the operator has been added to the sequence, ignore it.
- if(done_ops.find(op) != done_ops.end())
+ if (done_ops.find(op) != done_ops.end())
{
return;
}
@@ -593,9 +591,9 @@ private:
// is added to the sequence.
const auto src_tensors = _adj_src_tensors.at(op);
- for(auto src : src_tensors)
+ for (auto src : src_tensors)
{
- if(done_tensors.find(src) == done_tensors.end())
+ if (done_tensors.find(src) == done_tensors.end())
{
return;
}
@@ -606,24 +604,24 @@ private:
done_ops.insert(op);
- OpPack pack{ op, src_tensors, dst_tensors };
+ OpPack pack{op, src_tensors, dst_tensors};
ops_seq.push_back(pack);
done_tensors.insert(dst_tensors.begin(), dst_tensors.end());
// Visit all the sink operators.
// Call this function recursively unless there is only one sink.
- if(dst_tensors.size() == 1 && _adj_dst_ops.at(dst_tensors[0]).size() == 1)
+ if (dst_tensors.size() == 1 && _adj_dst_ops.at(dst_tensors[0]).size() == 1)
{
op = _adj_dst_ops.at(dst_tensors[0])[0];
}
else
{
- for(auto dst_tensor : dst_tensors)
+ for (auto dst_tensor : dst_tensors)
{
const auto dst_ops = _adj_dst_ops.at(dst_tensor);
- for(auto dst_op : dst_ops)
+ for (auto dst_op : dst_ops)
{
build_operators_sequence_from_op(dst_op, ops_seq, done_ops, done_tensors);
}
@@ -640,8 +638,8 @@ private:
AdjList _adj_src_ops{};
AdjList _adj_dst_ops{};
- bool _last_op_available{ false };
- OperatorId _last_op{ 0 };
+ bool _last_op_available{false};
+ OperatorId _last_op{0};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/utils/Utils.h b/src/dynamic_fusion/utils/Utils.h
index c9fc2c610f..3f4a2edd03 100644
--- a/src/dynamic_fusion/utils/Utils.h
+++ b/src/dynamic_fusion/utils/Utils.h
@@ -63,17 +63,21 @@ inline bool is_invalid_tensor(const ITensorInfo *tensor_info)
/** Inline function to convert @ref Pool2dAttributes to PoolingLayerInfo
*/
-inline PoolingLayerInfo convert_pool_attr_to_pool_info(const Pool2dAttributes &pool_attr, bool mixed_precision = false, DataLayout data_layout = DataLayout::NHWC)
+inline PoolingLayerInfo convert_pool_attr_to_pool_info(const Pool2dAttributes &pool_attr,
+ bool mixed_precision = false,
+ DataLayout data_layout = DataLayout::NHWC)
{
// Create PadStrideInfo
const Size2D stride = pool_attr.stride();
const Padding2D padding = pool_attr.pad();
- const PadStrideInfo pad_stride(stride.x(), stride.y(), padding.left, padding.top, arm_compute::DimensionRoundingType::FLOOR);
+ const PadStrideInfo pad_stride(stride.x(), stride.y(), padding.left, padding.top,
+ arm_compute::DimensionRoundingType::FLOOR);
- return PoolingLayerInfo(pool_attr.pool_type(), pool_attr.pool_size(), data_layout, pad_stride, pool_attr.exclude_padding(), mixed_precision);
-}
-}
-}
+ return PoolingLayerInfo(pool_attr.pool_type(), pool_attr.pool_size(), data_layout, pad_stride,
+ pool_attr.exclude_padding(), mixed_precision);
}
+} // namespace dynamic_fusion
+} // namespace experimental
+} // namespace arm_compute
#endif /* SRC_DYNAMIC_FUSION_UTILS_UTILS */