aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/ckw_driver
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/ckw_driver')
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp7
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h6
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp21
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h4
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp12
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp1
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h2
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp20
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h8
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp34
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp44
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h10
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp49
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp84
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h14
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp171
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h8
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp76
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp10
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h6
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h31
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h35
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h3
25 files changed, 373 insertions, 303 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
index 4b4c22fa1d..c4ab110c92 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.cpp
@@ -23,6 +23,7 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
#include "ckw/Error.h"
namespace arm_compute
@@ -36,12 +37,12 @@ GpuCkwComponentArgument::GpuCkwComponentArgument()
{
}
-GpuCkwComponentArgument::GpuCkwComponentArgument(ckw::TensorOperand &tensor)
- : _tensor(&tensor)
+GpuCkwComponentArgument::GpuCkwComponentArgument(ckw::TensorOperand &tensor) : _tensor(&tensor)
{
}
-GpuCkwComponentArgument &GpuCkwComponentArgument::init_virtual_tensor(ckw::TileOperand &tile, const ckw::TensorTileSampler &tile_sampler)
+GpuCkwComponentArgument &GpuCkwComponentArgument::init_virtual_tensor(ckw::TileOperand &tile,
+ const ckw::TensorTileSampler &tile_sampler)
{
CKW_ASSERT(_tile == nullptr);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
index 80f91389a0..863989a7bd 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h
@@ -110,9 +110,9 @@ public:
const ckw::TensorTileSampler &tile_sampler() const;
private:
- ckw::TensorOperand *_tensor{ nullptr };
- ckw::TileOperand *_tile{ nullptr };
- ckw::TensorTileSampler _tile_sampler{};
+ ckw::TensorOperand *_tensor{nullptr};
+ ckw::TileOperand *_tile{nullptr};
+ ckw::TensorTileSampler _tile_sampler{};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
index a24a172d77..c927f32bde 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
@@ -23,17 +23,16 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
-#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
-
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Window.h"
+
#include "src/common/utils/Log.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
-
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h"
+#include "src/dynamic_fusion/sketch/gpu/components/IGpuKernelComponent.h"
using namespace ckw;
namespace arm_compute
@@ -43,11 +42,11 @@ namespace experimental
namespace dynamic_fusion
{
GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
- : _components{ components }, _kernel{ GpuTargetLanguage::OpenCL }, _code{}
+ : _components{components}, _kernel{GpuTargetLanguage::OpenCL}, _code{}
{
// Generate kernel name
std::string name = "";
- for(auto &comp : _components)
+ for (auto &comp : _components)
{
auto ckw_driver = comp->ckw_component_driver();
ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
@@ -60,7 +59,7 @@ GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
GpuCkwScopedKernelWriter writer(&root_writer);
GpuCkwVariableTable vtable{};
- for(auto &comp : _components)
+ for (auto &comp : _components)
{
auto ckw_driver = comp->ckw_component_driver();
ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
@@ -82,7 +81,7 @@ std::string GpuCkwDriver::get_code()
std::string GpuCkwDriver::get_config_id()
{
std::string id = "";
- for(auto &comp : _components)
+ for (auto &comp : _components)
{
auto ckw_driver = comp->ckw_component_driver();
ARM_COMPUTE_ERROR_ON(ckw_driver == nullptr);
@@ -101,9 +100,9 @@ Window GpuCkwDriver::get_window() const
GpuKernelArgumentList GpuCkwDriver::get_kernel_arguments()
{
GpuKernelArgumentList args{};
- for(const auto &arg : _kernel.arguments())
+ for (const auto &arg : _kernel.arguments())
{
- switch(arg.type())
+ switch (arg.type())
{
case KernelArgument::Type::TensorStorage:
{
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
index 19db575fea..2ca5fb435c 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
@@ -24,12 +24,12 @@
#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWDRIVER
+#include "ckw/Kernel.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
-#include "ckw/Kernel.h"
-
#include <map>
#include <string>
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp
index ca4f121566..5f8ce919e3 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.cpp
@@ -23,10 +23,12 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
#include "ckw/Error.h"
#include "ckw/TileInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
+
namespace arm_compute
{
namespace experimental
@@ -34,21 +36,21 @@ namespace experimental
namespace dynamic_fusion
{
-GpuCkwKernelWriter::GpuCkwKernelWriter(ckw::Kernel &kernel)
- : KernelWriter(kernel)
+GpuCkwKernelWriter::GpuCkwKernelWriter(ckw::Kernel &kernel) : KernelWriter(kernel)
{
}
void GpuCkwKernelWriter::op_load_once(GpuCkwComponentArgument *tensor_or_tile, const ckw::TensorTileSampler &sampler)
{
- if(!tensor_or_tile->has_tile())
+ if (!tensor_or_tile->has_tile())
{
CKW_ASSERT(tensor_or_tile->has_tensor());
auto &tensor = tensor_or_tile->tensor();
const auto tile_name = tensor.name() + "_tile";
- auto &tile = declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
+ auto &tile =
+ declare_tile(tile_name.c_str(), ckw::TileInfo(tensor.data_type(), sampler.height(), sampler.width()));
op_load(tile, tensor, sampler);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
index 043fda9e6f..cbadbd9639 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.cpp
@@ -23,6 +23,7 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
+
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
namespace arm_compute
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
index 4d11b5e3e4..81049bfe37 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h
@@ -63,7 +63,7 @@ public:
private:
GpuCkwKernelWriter *_writer;
- int32_t _parent_id_space;
+ int32_t _parent_id_space;
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
index 37c27cd116..88a0cf7f43 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
@@ -23,11 +23,12 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
#include <sstream>
namespace arm_compute
@@ -36,19 +37,22 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
- const std::string &alias)
+GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group,
+ GpuCkwScopedKernelWriter &writer,
+ const ITensorInfo *tensor,
+ TensorStorageType storage,
+ const std::string &alias)
{
ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
// Do not re-declare if the variable associated with the tensor has already been declared
auto it = _vars.find(tensor->id());
- if(it != _vars.end())
+ if (it != _vars.end())
{
return &it->second;
}
- if(comp_group.is_intermediate_tensor(tensor))
+ if (comp_group.is_intermediate_tensor(tensor))
{
// Create a virtual tensor variable
GpuCkwComponentArgument var;
@@ -61,7 +65,7 @@ GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCo
std::stringstream ss;
ss << alias << "_t" << abs(tensor->id());
const auto uniq_name = ss.str();
- GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage)) };
+ GpuCkwComponentArgument var{writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage))};
auto &&inserted = _vars.emplace(tensor->id(), var);
return &(inserted.first->second);
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
index 0649dcba9d..2b118911b8 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
@@ -25,6 +25,7 @@
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE
#include "arm_compute/core/ITensorInfo.h"
+
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include <map>
@@ -58,8 +59,11 @@ public:
*
* @return GpuCkwComponentArgument*
*/
- GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
- const std::string &alias = "unnamed");
+ GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group,
+ GpuCkwScopedKernelWriter &writer,
+ const ITensorInfo *tensor,
+ TensorStorageType storage,
+ const std::string &alias = "unnamed");
private:
std::map<ITensorInfo::Id, GpuCkwComponentArgument> _vars{};
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
index 14086f785e..52e56e2e35 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/IGpuCkwComponentDriver.h
@@ -25,6 +25,7 @@
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_IGPUCKWCOMPONENTDRIVER
#include "arm_compute/core/Window.h"
+
#include "src/dynamic_fusion/sketch/ArgumentPack.h"
#include "src/dynamic_fusion/sketch/gpu/components/Types.h"
@@ -73,8 +74,7 @@ public:
* @param[in] id Component id
* @param[in] tensors Tensor arguments to the components
*/
- IGpuCkwComponentDriver(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : _id{ id }, _tensors{ tensors }
+ IGpuCkwComponentDriver(ComponentId id, const ArgumentPack<ITensorInfo> &tensors) : _id{id}, _tensors{tensors}
{
}
/** Destructor */
@@ -89,7 +89,9 @@ public:
*
* @note @p writer can only be passed via value since the new scope is created in the copy constructor
*/
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const = 0;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const = 0;
/** Get tensor arguments */
ArgumentPack<ITensorInfo> tensors() const
{
@@ -128,7 +130,7 @@ public:
}
private:
- ComponentId _id{ -1 };
+ ComponentId _id{-1};
ArgumentPack<ITensorInfo> _tensors{};
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
index c07fac0e0d..c3b1b3c8bc 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
@@ -24,16 +24,18 @@
#include "GpuCkwActivation.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
+
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
#include <string>
using namespace ckw;
@@ -87,24 +89,25 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
GpuCkwActivation::GpuCkwActivation(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes }
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
}
-void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler);
@@ -119,7 +122,7 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
// Perform the operation.
- switch(_attributes.activation())
+ switch (_attributes.activation())
{
case ActivationLayerInfo::ActivationFunction::LOGISTIC:
{
@@ -179,9 +182,10 @@ Window GpuCkwActivation::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
index e157e36cbf..386e933a72 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.h
@@ -46,15 +46,15 @@ public:
* @param[in] tensors Tensor arguments to the component
* @param[in] attributes Component attributes
*/
- GpuCkwActivation(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ GpuCkwActivation(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwActivation);
/** Destructor */
~GpuCkwActivation() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- Window get_window() const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
private:
const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
index 6ecf2bac44..e8e5087633 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -24,16 +24,18 @@
#include "GpuCkwCast.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
+
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+
#include <string>
using namespace ckw;
@@ -84,30 +86,29 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
}
} // namespace
-GpuCkwCast::GpuCkwCast(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes }
+GpuCkwCast::GpuCkwCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
}
-void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwCast::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the source tile and prepare the sampler.
- if(!src->has_tile())
+ if (!src->has_tile())
{
const auto sampler = create_sampler(writer, m0, n0);
writer->op_load_once(src, sampler);
@@ -122,7 +123,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
const auto &sampler = src->tile_sampler();
// Prepare the output tile.
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
// Get Target datatype and convert it to ckw::DataType.
ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
@@ -143,7 +144,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
const size_t dst_size = data_size_from_type(_dst->data_type());
const bool cast_down = (src_size >= dst_size);
- if(cast_down && is_data_type_quantized(_src->data_type()))
+ if (cast_down && is_data_type_quantized(_src->data_type()))
{
const auto &constant_x80 = writer->declare_tile("0x80", 0x80);
writer->op_binary_expression(src_tile, src_tile, BinaryOp::BitwiseXOR, constant_x80);
@@ -151,7 +152,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
ckw::ConvertPolicy convert_policy = ckw::ConvertPolicy::None;
- if(cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
+ if (cast_down && (is_data_type_float(_src->data_type()) || _attributes.convert_policy() == ConvertPolicy::SATURATE))
{
convert_policy = ckw::ConvertPolicy::Saturate;
}
@@ -167,9 +168,10 @@ Window GpuCkwCast::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
index 821cec1e19..2389301196 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.h
@@ -46,15 +46,15 @@ public:
* @param[in] tensors Tensor arguments to the component
* @param[in] attributes Component attributes
*/
- GpuCkwCast(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ GpuCkwCast(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwCast);
/** Destructor */
~GpuCkwCast() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- Window get_window() const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
private:
const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
index 3c906646a6..7833da2334 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.cpp
@@ -25,21 +25,20 @@
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwDirectConv2d.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "arm_compute/core/utils/StringUtils.h"
-
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
#include "ckw/TileInfo.h"
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
namespace arm_compute
{
@@ -54,13 +53,7 @@ GpuCkwDirectConv2d::GpuCkwDirectConv2d(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _wei{},
- _bia{},
- _dst{},
- _attributes{ attributes },
- _settings{ settings }
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _wei{}, _bia{}, _dst{}, _attributes{attributes}, _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_wei = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -69,7 +62,9 @@ GpuCkwDirectConv2d::GpuCkwDirectConv2d(ComponentId id,
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _wei, _dst); // Bias can be null
}
-void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto desc = _settings.direct_conv_descriptor();
ARM_COMPUTE_ERROR_ON_MSG(desc.export_input_to_cl_image || desc.export_output_to_cl_image,
@@ -99,15 +94,18 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
// extra loop to compute the left-over elements.
const bool use_cl_image_for_weights = desc.export_weights_to_cl_image && (k0 == 4) && (K % 4 == 0);
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
GpuCkwComponentArgument *wei = vtable.declare_variable(
- comp_group, writer, _wei, use_cl_image_for_weights ? TensorStorageType::ClImage2dReadOnly : TensorStorageType::ClBufferUint8Ptr, "wei");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ comp_group, writer, _wei,
+ use_cl_image_for_weights ? TensorStorageType::ClImage2dReadOnly : TensorStorageType::ClBufferUint8Ptr, "wei");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
GpuCkwComponentArgument *bia = nullptr;
const bool using_bias = _bia != nullptr;
- if(using_bias)
+ if (using_bias)
{
bia = vtable.declare_variable(comp_group, writer, _bia, TensorStorageType::ClBufferUint8Ptr, "bia");
}
@@ -154,7 +152,8 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
src_sampler.address_mode_x(TensorSamplerAddressModeX::None);
// We cannot have out-of-bounds reads when the kernel height is equal to 1. Otherwise, we need to ensure the
// indirection buffer mi does not contain negative values representing out-of-bounds reads.
- src_sampler.address_mode_y(kernel_height == 1 ? TensorSamplerAddressModeY::None : TensorSamplerAddressModeY::SkipMinEdgeOnly);
+ src_sampler.address_mode_y(kernel_height == 1 ? TensorSamplerAddressModeY::None
+ : TensorSamplerAddressModeY::SkipMinEdgeOnly);
src_sampler.address_mode_z(TensorSamplerAddressModeZ::None);
TensorTileSampler wei_sampler;
@@ -178,7 +177,7 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
dst_sampler.z(tile_0);
dst_sampler.b(tile_bout);
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
auto &tile = writer->declare_tile("dst", TileInfo(to_ckw(_dst->data_type()), m0, n0));
dst->init_virtual_tensor(tile, dst_sampler);
@@ -189,10 +188,10 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
// We create a 2d container of size (M0, 1) to store the indices for iteration
TileContainer it;
- for(int m = 0; m < m0; ++m)
+ for (int m = 0; m < m0; ++m)
{
- std::vector<std::string> idx { std::to_string(m) };
- it.push_back({ idx });
+ std::vector<std::string> idx{std::to_string(m)};
+ it.push_back({idx});
}
const auto &tile_it = writer->declare_tile("it", it, ckw::DataType::Int32);
@@ -289,9 +288,9 @@ void GpuCkwDirectConv2d::write_component_code(const ComponentGroup &comp_group,
// Bias addition
// NOTE: This operation will be removed from this kernel as the interface is standardized. The intended way of
// performing bias addition is to fuse this convolution kernel with a following elementwise addition kernel.
- if(using_bias)
+ if (using_bias)
{
- if(!bia->has_tile())
+ if (!bia->has_tile())
{
// Reuse the destination sampler for the bias
writer->op_load_once(bia, dst_sampler);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
index c8bf999261..2935ba45ea 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
@@ -24,22 +24,24 @@
#include "GpuCkwElementwiseBinary.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
-#include "arm_compute/core/utils/StringUtils.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/utils/StringUtils.h"
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
#include "ckw/types/TensorSamplerTypes.h"
+
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h"
#include "src/dynamic_fusion/sketch/gpu/components/utils/type_printer/ElementwiseBinary.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
+
#include <algorithm>
#include <string>
@@ -53,11 +55,7 @@ namespace dynamic_fusion
GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes)
- : IGpuCkwComponentDriver{ id, tensors },
- _lhs{},
- _rhs{},
- _dst{},
- _attributes{ attributes }
+ : IGpuCkwComponentDriver{id, tensors}, _lhs{}, _rhs{}, _dst{}, _attributes{attributes}
{
_lhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_rhs = this->tensors().get_const_tensor(TensorType::ACL_SRC_1);
@@ -65,15 +63,20 @@ GpuCkwElementwiseBinary::GpuCkwElementwiseBinary(ComponentId
ARM_COMPUTE_ERROR_ON_NULLPTR(_lhs, _rhs, _dst);
}
-void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
const auto n0 = static_cast<int32_t>(root_window.x().step());
const auto m0 = static_cast<int32_t>(root_window.y().step());
- GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
- GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *lhs =
+ vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
+ GpuCkwComponentArgument *rhs =
+ vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
auto &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32);
auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
@@ -86,32 +89,36 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr
auto &const_0 = writer->declare_tile("0", 0);
// Load the LHS and RHS tiles
- if(!lhs->has_tile())
+ if (!lhs->has_tile())
{
- auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1), n0, m0, "lhs_", const_0);
+ auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _lhs->dimension(0), _lhs->dimension(1),
+ n0, m0, "lhs_", const_0);
sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
sampler.z(const_0);
sampler.b(gid_2);
writer->op_load_once(lhs, sampler);
}
- if(!rhs->has_tile())
+ if (!rhs->has_tile())
{
- auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1), n0, m0, "rhs_", const_0);
+ auto sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _rhs->dimension(0), _rhs->dimension(1),
+ n0, m0, "rhs_", const_0);
sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
sampler.z(const_0);
sampler.b(gid_2);
writer->op_load_once(rhs, sampler);
}
- auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1), n0, m0, "dst_", const_0);
+ auto dst_sampler = create_boundary_aware_2d_sampler(writer, gid_0, gid_1, _dst->dimension(0), _dst->dimension(1),
+ n0, m0, "dst_", const_0);
dst_sampler.format(TensorSamplerFormat::C_WH_1); // 3rd dimension collapsed with 2nd dimension
dst_sampler.z(const_0);
dst_sampler.b(gid_2);
// Prepare the output tile.
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
- auto &tile = writer->declare_tile("dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width()));
+ auto &tile = writer->declare_tile(
+ "dst_tile", ckw::TileInfo(to_ckw(_dst->data_type()), dst_sampler.height(), dst_sampler.width()));
dst->init_virtual_tensor(tile, dst_sampler);
}
@@ -131,9 +138,10 @@ Window GpuCkwElementwiseBinary::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration =
+ adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
@@ -141,11 +149,12 @@ Window GpuCkwElementwiseBinary::get_window() const
std::string GpuCkwElementwiseBinary::get_name(const ComponentGroup &comp_group) const
{
ARM_COMPUTE_UNUSED(comp_group);
- const std::vector<std::string> build_params =
- {
+ const std::vector<std::string> build_params = {
"elementwise_binary",
- "op", to_string(_attributes.operation()),
- "dt", lower_string(string_from_data_type(_dst->data_type())),
+ "op",
+ to_string(_attributes.operation()),
+ "dt",
+ lower_string(string_from_data_type(_dst->data_type())),
};
return join(build_params, "_");
}
@@ -154,13 +163,16 @@ std::string GpuCkwElementwiseBinary::get_tuner_id(const ComponentGroup &comp_gro
{
ARM_COMPUTE_UNUSED(comp_group);
/// NOTE: Hardcoded for now, the parameters should ideally be exported by ckw (a selection of constant tiles)
- std::vector<std::string> build_params =
- {
+ std::vector<std::string> build_params = {
"elementwise_binary",
- "op", to_string(_attributes.operation()),
- "dt", lower_string(string_from_data_type(_dst->data_type())),
- "dst_dim0", support::cpp11::to_string(_dst->dimension(0)),
- "dst_dim1", support::cpp11::to_string(_dst->dimension(1)),
+ "op",
+ to_string(_attributes.operation()),
+ "dt",
+ lower_string(string_from_data_type(_dst->data_type())),
+ "dst_dim0",
+ support::cpp11::to_string(_dst->dimension(0)),
+ "dst_dim1",
+ support::cpp11::to_string(_dst->dimension(1)),
};
return join(build_params, "_");
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
index e9c41530f8..1a20d4c533 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.h
@@ -46,17 +46,17 @@ public:
* @param[in] tensors Tensor arguments to the component
* @param[in] attributes Component attributes
*/
- GpuCkwElementwiseBinary(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes);
+ GpuCkwElementwiseBinary(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes);
ARM_COMPUTE_DISALLOW_COPY_ALLOW_MOVE(GpuCkwElementwiseBinary);
/** Destructor */
~GpuCkwElementwiseBinary() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- Window get_window() const override;
- std::string get_name(const ComponentGroup &comp_group) const override;
- std::string get_tuner_id(const ComponentGroup &comp_group) const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
+ std::string get_name(const ComponentGroup &comp_group) const override;
+ std::string get_tuner_id(const ComponentGroup &comp_group) const override;
private:
const ITensorInfo *_lhs;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
index 9c9a298132..8ab3ec3a55 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.cpp
@@ -24,17 +24,18 @@
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h"
#include "arm_compute/core/Error.h"
-#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
+#include "arm_compute/core/Validate.h"
#include "ckw/TensorTileSampler.h"
+
#include "src/core/helpers/WindowHelpers.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
using namespace ckw;
@@ -48,11 +49,7 @@ GpuCkwPool2d::GpuCkwPool2d(ComponentId id,
const ArgumentPack<ITensorInfo> &tensors,
const Attributes &attributes,
const Settings &settings)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes },
- _settings{ settings }
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}, _settings{settings}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
@@ -60,14 +57,18 @@ GpuCkwPool2d::GpuCkwPool2d(ComponentId id,
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
}
-void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const auto root_window = comp_group.get_root_component()->ckw_component_driver()->get_window();
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
TileOperand &gid_0 = writer->declare_tile("gid_0", ckw::DataType::Int32);
TileOperand &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
@@ -90,23 +91,26 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
const auto src_data_type = _src->data_type();
// Check if this is global pooling path
- const bool is_global_pooling = (pool_size_x == src_width) && (pool_size_y == src_height) && (pad_x == 0) && (pad_y == 0);
+ const bool is_global_pooling =
+ (pool_size_x == src_width) && (pool_size_y == src_height) && (pad_x == 0) && (pad_y == 0);
// Check if this a case of FP_MIXED_PRECISION
- const bool use_fp_mixed_precision = (src_data_type == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
- const auto acc_data_type = (use_fp_mixed_precision) ? (DataType::F32) : (src_data_type);
+ const bool use_fp_mixed_precision =
+ (src_data_type == DataType::F16) && _settings.mixed_precision() && _attributes.pool_type() != PoolingType::MAX;
+ const auto acc_data_type = (use_fp_mixed_precision) ? (DataType::F32) : (src_data_type);
TileOperand &const_0 = writer->declare_tile("0", 0);
const TileOperand &const_1 = writer->declare_tile("1", 1);
const TileOperand &const_lowest_value = writer->declare_tile("LOWEST_VALUE", std::numeric_limits<float>::lowest());
const TileOperand &pool_size_x_tile = writer->declare_tile("POOL_SIZE_X", pool_size_x);
const TileOperand &pool_size_y_tile = writer->declare_tile("POOL_SIZE_Y", pool_size_y);
- const TileOperand &stride_x_tile = writer->declare_tile("STRIDE_X", static_cast<int32_t>(_attributes.stride().x()));
- const TileOperand &stride_y_tile = writer->declare_tile("STRIDE_Y", static_cast<int32_t>(_attributes.stride().y()));
- const TileOperand &pad_x_tile = writer->declare_tile("PAD_X", pad_x);
- const TileOperand &pad_y_tile = writer->declare_tile("PAD_Y", pad_y);
- const TileOperand &dst_height_tile = writer->declare_tile("DST_HEIGHT", static_cast<int32_t>(_dst->dimension(height_idx)));
- const TileOperand &src_height_tile = writer->declare_tile("SRC_HEIGHT", src_height);
- const TileOperand &src_width_tile = writer->declare_tile("SRC_WIDTH", src_width);
+ const TileOperand &stride_x_tile = writer->declare_tile("STRIDE_X", static_cast<int32_t>(_attributes.stride().x()));
+ const TileOperand &stride_y_tile = writer->declare_tile("STRIDE_Y", static_cast<int32_t>(_attributes.stride().y()));
+ const TileOperand &pad_x_tile = writer->declare_tile("PAD_X", pad_x);
+ const TileOperand &pad_y_tile = writer->declare_tile("PAD_Y", pad_y);
+ const TileOperand &dst_height_tile =
+ writer->declare_tile("DST_HEIGHT", static_cast<int32_t>(_dst->dimension(height_idx)));
+ const TileOperand &src_height_tile = writer->declare_tile("SRC_HEIGHT", src_height);
+ const TileOperand &src_width_tile = writer->declare_tile("SRC_WIDTH", src_width);
TileOperand &idx_out_n = writer->declare_tile("idx_out_n", ckw::DataType::Int32);
TileOperand &idx_out_h = writer->declare_tile("idx_out_h", ckw::DataType::Int32);
@@ -145,7 +149,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
// Prepare dst tensor and tile
TileInfo dst_tile_info = TileInfo(to_ckw(src_data_type), m0, n0);
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
TileOperand &dst_tile = writer->declare_tile("dst_tile", dst_tile_info);
dst->init_virtual_tensor(dst_tile, dst_sampler);
@@ -156,14 +160,15 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
const TileOperand &res_tile = writer->declare_tile("res_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
// Initialise result tile with appropriate value
- if(_attributes.pool_type() == PoolingType::MAX)
+ if (_attributes.pool_type() == PoolingType::MAX)
{
- if(_settings.use_inf_as_limit())
+ if (_settings.use_inf_as_limit())
{
TileContainer minus_inf_tile_container;
std::vector<std::string> value = std::vector<std::string>(n0, "(-INFINITY)");
- minus_inf_tile_container.push_back({ value });
- const TileOperand &minus_inf = writer->declare_tile("minus_inf_const", minus_inf_tile_container, to_ckw(acc_data_type));
+ minus_inf_tile_container.push_back({value});
+ const TileOperand &minus_inf =
+ writer->declare_tile("minus_inf_const", minus_inf_tile_container, to_ckw(acc_data_type));
writer->op_assign(res_tile, minus_inf);
}
else
@@ -209,7 +214,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
writer->op_binary_elementwise_function(pool_y_e, BinaryFunction::Min, pool_size_y_tile, pool_y_e);
const TileOperand &filter_size = writer->declare_tile("filter_size", ckw::DataType::Int32);
- if(_attributes.exclude_padding())
+ if (_attributes.exclude_padding())
{
const TileOperand &y_diff = writer->declare_tile("y_diff", ckw::DataType::Int32);
const TileOperand &x_diff = writer->declare_tile("x_diff", ckw::DataType::Int32);
@@ -227,7 +232,7 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
const TileOperand &x = writer->declare_tile("x", ckw::DataType::Int32);
const TileOperand &y = writer->declare_tile("y", ckw::DataType::Int32);
- if(is_global_pooling)
+ if (is_global_pooling)
{
writer->op_assign(x, const_0);
writer->op_assign(y, const_0);
@@ -242,76 +247,80 @@ void GpuCkwPool2d::write_component_code(const ComponentGroup &comp_group, GpuCkw
}
// Y dim for-loop
- writer->op_for_loop(y, BinaryOp::Less, pool_y_e, y, AssignmentOp::Increment, const_1, [&]()
- {
- // Reset the iterator for the inner loop
- if(is_global_pooling)
- {
- writer->op_assign(x, const_0);
- }
- else
+ writer->op_for_loop(
+ y, BinaryOp::Less, pool_y_e, y, AssignmentOp::Increment, const_1,
+ [&]()
{
- writer->op_assign(x, pool_x_s);
- }
-
- TileOperand &a_y = writer->declare_tile("a_y", ckw::DataType::Int32);
- writer->op_binary_expression(a_y, idx_in_h, BinaryOp::Add, y);
-
- // X dim for-loop
- writer->op_for_loop(x, BinaryOp::Less, pool_x_e, x, AssignmentOp::Increment, const_1, [&]()
- {
- TileOperand &a_x = writer->declare_tile("a_x", ckw::DataType::Int32);
- writer->op_binary_expression(a_x, idx_in_w, BinaryOp::Add, x);
-
- TileOperand &src_tile = writer->declare_tile("src_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
-
- src_sampler.y(a_x);
- src_sampler.z(a_y);
-
- // Load src tile
- if(use_fp_mixed_precision)
+ // Reset the iterator for the inner loop
+ if (is_global_pooling)
{
- TileOperand &src_uncasted_tile = writer->declare_tile("uncasted_src_tile", dst_tile_info);
- writer->op_load(src_uncasted_tile, src->tensor(), src_sampler);
- writer->op_cast_expression(src_tile, src_uncasted_tile, ckw::ConvertPolicy::None);
+ writer->op_assign(x, const_0);
}
else
{
- writer->op_load(src_tile, src->tensor(), src_sampler);
+ writer->op_assign(x, pool_x_s);
}
- // Take the square of the input, for L2 Pooling
- if(_attributes.pool_type() == PoolingType::L2)
- {
- writer->op_binary_expression(src_tile, src_tile, BinaryOp::Mul, src_tile);
- }
-
- // Perfom Pooling op
- if(_attributes.pool_type() == PoolingType::MAX)
- {
- writer->op_binary_elementwise_function(res_tile, BinaryFunction::Max, res_tile, src_tile);
- }
- else
- {
- writer->op_binary_expression(res_tile, res_tile, BinaryOp::Add, src_tile);
- }
+ TileOperand &a_y = writer->declare_tile("a_y", ckw::DataType::Int32);
+ writer->op_binary_expression(a_y, idx_in_h, BinaryOp::Add, y);
+
+ // X dim for-loop
+ writer->op_for_loop(
+ x, BinaryOp::Less, pool_x_e, x, AssignmentOp::Increment, const_1,
+ [&]()
+ {
+ TileOperand &a_x = writer->declare_tile("a_x", ckw::DataType::Int32);
+ writer->op_binary_expression(a_x, idx_in_w, BinaryOp::Add, x);
+
+ TileOperand &src_tile = writer->declare_tile("src_tile", TileInfo(to_ckw(acc_data_type), m0, n0));
+
+ src_sampler.y(a_x);
+ src_sampler.z(a_y);
+
+ // Load src tile
+ if (use_fp_mixed_precision)
+ {
+ TileOperand &src_uncasted_tile = writer->declare_tile("uncasted_src_tile", dst_tile_info);
+ writer->op_load(src_uncasted_tile, src->tensor(), src_sampler);
+ writer->op_cast_expression(src_tile, src_uncasted_tile, ckw::ConvertPolicy::None);
+ }
+ else
+ {
+ writer->op_load(src_tile, src->tensor(), src_sampler);
+ }
+
+ // Take the square of the input, for L2 Pooling
+ if (_attributes.pool_type() == PoolingType::L2)
+ {
+ writer->op_binary_expression(src_tile, src_tile, BinaryOp::Mul, src_tile);
+ }
+
+ // Perfom Pooling op
+ if (_attributes.pool_type() == PoolingType::MAX)
+ {
+ writer->op_binary_elementwise_function(res_tile, BinaryFunction::Max, res_tile, src_tile);
+ }
+ else
+ {
+ writer->op_binary_expression(res_tile, res_tile, BinaryOp::Add, src_tile);
+ }
+ });
});
- });
- if((_attributes.pool_type() == PoolingType::AVG) || (_attributes.pool_type() == PoolingType::L2))
+ if ((_attributes.pool_type() == PoolingType::AVG) || (_attributes.pool_type() == PoolingType::L2))
{
// filter_size is automatically broadcasted in the operation
writer->op_binary_expression(res_tile, res_tile, BinaryOp::Div, filter_size);
}
// Take square root of the result in L2 pooling
- if(_attributes.pool_type() == PoolingType::L2)
+ if (_attributes.pool_type() == PoolingType::L2)
{
writer->op_unary_elementwise_function(res_tile, UnaryFunction::Sqrt, res_tile);
}
// Store the results and do casting if FP_MIXED_PRECISION
- if(use_fp_mixed_precision)
+ if (use_fp_mixed_precision)
{
writer->op_cast_expression(dst_tile, res_tile, ckw::ConvertPolicy::None);
}
@@ -326,7 +335,7 @@ Window GpuCkwPool2d::get_window() const
ARM_COMPUTE_ERROR_ON_MSG(_dst->tensor_shape().total_size() == 0U, "Destination tensor is not initialized");
TensorShape output_shape = _dst->tensor_shape();
- const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
+ const unsigned int vec_size = adjust_vec_size(((_dst->data_type() == DataType::F32) ? 2 : 4), _dst->dimension(0));
// Create and configure kernel window
auto win = calculate_max_window(output_shape, Steps(vec_size));
win = win.collapse_if_possible(win, Window::DimZ); // collapse window on batch size.
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
index 2ccf255236..822282a108 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwPool2d.h
@@ -59,9 +59,11 @@ public:
/** Destructor */
~GpuCkwPool2d() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- Window get_window() const override;
- std::string get_name(const ComponentGroup &comp_group) const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ Window get_window() const override;
+ std::string get_name(const ComponentGroup &comp_group) const override;
private:
const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
index d997c82dae..f2a7d41afd 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwResize.cpp
@@ -28,14 +28,13 @@
#include "src/core/helpers/WindowHelpers.h"
#include "src/core/utils/ScaleUtils.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h"
-
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "support/StringSupport.h"
namespace arm_compute
@@ -49,20 +48,17 @@ namespace
constexpr unsigned int opencl_vector_size_in_bytes = 16;
} // namespace
-GpuCkwResize::GpuCkwResize(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
- : IGpuCkwComponentDriver{ id, tensors },
- _src{},
- _dst{},
- _attributes{ attributes }
+GpuCkwResize::GpuCkwResize(ComponentId id, const ArgumentPack<ITensorInfo> &tensors, const Attributes &attributes)
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}, _attributes{attributes}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST);
ARM_COMPUTE_ERROR_ON_NULLPTR(_src, _dst);
}
-void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const size_t width_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::WIDTH);
const size_t height_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::HEIGHT);
@@ -72,12 +68,16 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
const int32_t m0 = root_window.y().step();
const int32_t partial_n0 = _dst->dimension(0) % n0;
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Constants
- const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx), _attributes.align_corners());
- const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx), _attributes.align_corners());
+ const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx),
+ _attributes.align_corners());
+ const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx),
+ _attributes.align_corners());
const auto &tile_scale_x = writer->declare_tile("scale_x", scale_x);
const auto &tile_scale_y = writer->declare_tile("scale_y", scale_y);
const auto &tile_0 = writer->declare_tile("0", 0);
@@ -112,7 +112,7 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
const auto &tile_xi_f = writer->declare_tile("xi_f", ckw::DataType::Fp32);
const auto &tile_yi_f = writer->declare_tile("yi_f", ckw::DataType::Fp32);
- switch(_attributes.sampling_policy())
+ switch (_attributes.sampling_policy())
{
case SamplingPolicy::TOP_LEFT:
// xi_f = (xo * scale_x)
@@ -138,7 +138,7 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
ARM_COMPUTE_ERROR("Unsupported sampling policy");
}
- if(_attributes.align_corners())
+ if (_attributes.align_corners())
{
writer->op_unary_elementwise_function(tile_xi_f, UnaryFunction::Round, tile_xi_f);
writer->op_unary_elementwise_function(tile_yi_f, UnaryFunction::Round, tile_yi_f);
@@ -161,8 +161,10 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
auto &tile_xi0 = writer->declare_tile("xi0", ckw::DataType::Int32);
auto &tile_yi0 = writer->declare_tile("yi0", ckw::DataType::Int32);
- writer->op_ternary_elementwise_function(tile_xi0, TernaryFunction::Clamp, tile_xi_f_int, tile_0, tile_src_w_minus_1);
- writer->op_ternary_elementwise_function(tile_yi0, TernaryFunction::Clamp, tile_yi_f_int, tile_0, tile_src_h_minus_1);
+ writer->op_ternary_elementwise_function(tile_xi0, TernaryFunction::Clamp, tile_xi_f_int, tile_0,
+ tile_src_w_minus_1);
+ writer->op_ternary_elementwise_function(tile_yi0, TernaryFunction::Clamp, tile_yi_f_int, tile_0,
+ tile_src_h_minus_1);
TensorTileSampler src_sampler;
src_sampler.x(tile_co);
@@ -199,7 +201,9 @@ void GpuCkwResize::do_nearest_neighbor_resize(const ComponentGroup &comp_group,
writer->op_assign(tile_dst, tile_src);
}
-void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
const size_t width_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::WIDTH);
const size_t height_idx = get_data_layout_dimension_index(_dst->data_layout(), DataLayoutDimension::HEIGHT);
@@ -209,12 +213,16 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
const int32_t m0 = root_window.y().step();
const int32_t partial_n0 = _dst->dimension(0) % n0;
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
+ GpuCkwComponentArgument *src =
+ vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst =
+ vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Constants
- const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx), _attributes.align_corners());
- const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx), _attributes.align_corners());
+ const float scale_x = scale_utils::calculate_resize_ratio(_src->dimension(width_idx), _dst->dimension(width_idx),
+ _attributes.align_corners());
+ const float scale_y = scale_utils::calculate_resize_ratio(_src->dimension(height_idx), _dst->dimension(height_idx),
+ _attributes.align_corners());
const auto &tile_scale_x = writer->declare_tile("scale_x", scale_x);
const auto &tile_scale_y = writer->declare_tile("scale_y", scale_y);
const auto &tile_0 = writer->declare_tile("0", 0);
@@ -251,7 +259,7 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
const auto &tile_xi_f = writer->declare_tile("xi_f", ckw::DataType::Fp32);
const auto &tile_yi_f = writer->declare_tile("yi_f", ckw::DataType::Fp32);
- switch(_attributes.sampling_policy())
+ switch (_attributes.sampling_policy())
{
case SamplingPolicy::TOP_LEFT:
// xi_f = (xo * scale_x)
@@ -312,8 +320,10 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
writer->op_ternary_elementwise_function(tile_xi0, TernaryFunction::Clamp, tile_xi, tile_0, tile_src_w_minus_1);
writer->op_ternary_elementwise_function(tile_yi0, TernaryFunction::Clamp, tile_yi, tile_0, tile_src_h_minus_1);
- writer->op_ternary_elementwise_function(tile_xi1, TernaryFunction::Clamp, tile_xi_plus_1, tile_0, tile_src_w_minus_1);
- writer->op_ternary_elementwise_function(tile_yi1, TernaryFunction::Clamp, tile_yi_plus_1, tile_0, tile_src_h_minus_1);
+ writer->op_ternary_elementwise_function(tile_xi1, TernaryFunction::Clamp, tile_xi_plus_1, tile_0,
+ tile_src_w_minus_1);
+ writer->op_ternary_elementwise_function(tile_yi1, TernaryFunction::Clamp, tile_yi_plus_1, tile_0,
+ tile_src_h_minus_1);
TensorTileSampler in_sampler;
in_sampler.x(tile_co);
@@ -388,7 +398,7 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
writer->op_binary_expression(tile_a1, tile_yi_f, BinaryOp::Sub, tile_yi_float);
writer->op_binary_expression(tile_b1, tile_1, BinaryOp::Sub, tile_a1);
- if(is_data_type_float(_src->data_type()))
+ if (is_data_type_float(_src->data_type()))
{
// Cast weights to source type
const auto &tile_a_src_type = writer->declare_tile("a_src_t", to_ckw(_src->data_type()));
@@ -461,9 +471,11 @@ void GpuCkwResize::do_bilinear_resize(const ComponentGroup &comp_group, GpuCkwVa
}
}
-void GpuCkwResize::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwResize::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
- switch(_attributes.interpolation_policy())
+ switch (_attributes.interpolation_policy())
{
case InterpolationPolicy::NEAREST_NEIGHBOR:
do_nearest_neighbor_resize(comp_group, vtable, writer);
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
index 8917391537..889706b0c0 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
@@ -24,10 +24,12 @@
#include "GpuCkwStore.h"
#include "arm_compute/core/Error.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+
#include <string>
namespace arm_compute
@@ -37,12 +39,14 @@ namespace experimental
namespace dynamic_fusion
{
GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensors)
- : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{}
+ : IGpuCkwComponentDriver{id, tensors}, _src{}, _dst{}
{
_src = this->tensors().get_const_tensor(TensorType::ACL_SRC_0);
_dst = this->tensors().get_const_tensor(TensorType::ACL_DST_0);
}
-void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
+void GpuCkwStore::write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const
{
auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
index 8e35651caf..f1f0e6747b 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.h
@@ -48,8 +48,10 @@ public:
/** Destructor */
~GpuCkwStore() override = default;
// Inherited methods overriden:
- virtual void write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const override;
- std::string get_name(const ComponentGroup &comp_group) const override;
+ virtual void write_component_code(const ComponentGroup &comp_group,
+ GpuCkwVariableTable &vtable,
+ GpuCkwScopedKernelWriter writer) const override;
+ std::string get_name(const ComponentGroup &comp_group) const override;
private:
const ITensorInfo *_src;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h
index e2b8584b99..6ba2b2f651 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h
@@ -26,6 +26,7 @@
#include "arm_compute/core/utils/misc/Utility.h"
#include "ckw/TensorTileSampler.h"
+
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
@@ -44,9 +45,14 @@ using SamplerCreator = std::function<TensorTileSampler(GpuCkwScopedKernelWriter
/** Load src and dst tiles of dimension [m0, n0] only when not loaded and prepare the sampler
*/
-inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &writer, GpuCkwComponentArgument *src, GpuCkwComponentArgument *dst, int32_t m0, int32_t n0, SamplerCreator create_sampler)
+inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &writer,
+ GpuCkwComponentArgument *src,
+ GpuCkwComponentArgument *dst,
+ int32_t m0,
+ int32_t n0,
+ SamplerCreator create_sampler)
{
- if(!src->has_tile())
+ if (!src->has_tile())
{
const auto sampler = create_sampler(writer, m0, n0);
writer->op_load_once(src, sampler);
@@ -61,7 +67,7 @@ inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &wri
const auto &sampler = src->tile_sampler();
// Prepare the output tile.
- if(!dst->has_tile())
+ if (!dst->has_tile())
{
auto &tile = writer->declare_tile("dst_tile", src_tile.tile_info());
dst->init_virtual_tensor(tile, sampler);
@@ -78,7 +84,13 @@ inline void load_src_dst_tiles_and_prepare_sampler(GpuCkwScopedKernelWriter &wri
* @param[in] prefix Prefix to all the tiles declared within this function
* @param[in] const_0 Constant tile of value 0
*/
-inline void get_coord(GpuCkwScopedKernelWriter writer, TileOperand &coord, const TileOperand &gid, int32_t step_v, int32_t leftover_step_v, const std::string &prefix, const TileOperand &const_0)
+inline void get_coord(GpuCkwScopedKernelWriter writer,
+ TileOperand &coord,
+ const TileOperand &gid,
+ int32_t step_v,
+ int32_t leftover_step_v,
+ const std::string &prefix,
+ const TileOperand &const_0)
{
auto &step = writer->declare_tile(prefix + "step", step_v);
auto &leftover_step = writer->declare_tile(prefix + "leftover_step", leftover_step_v);
@@ -122,8 +134,15 @@ inline void get_coord(GpuCkwScopedKernelWriter writer, TileOperand &coord, const
*
* @return TensorTileSampler
*/
-inline TensorTileSampler create_boundary_aware_2d_sampler(GpuCkwScopedKernelWriter writer, TileOperand &gid_0, TileOperand &gid_1, int32_t dim0_v, int32_t dim1_v, int32_t n0_v, int32_t m0_v,
- const std::string prefix, TileOperand &const_0)
+inline TensorTileSampler create_boundary_aware_2d_sampler(GpuCkwScopedKernelWriter writer,
+ TileOperand &gid_0,
+ TileOperand &gid_1,
+ int32_t dim0_v,
+ int32_t dim1_v,
+ int32_t n0_v,
+ int32_t m0_v,
+ const std::string prefix,
+ TileOperand &const_0)
{
// Clamp tile size [n0, m0] against dimension [dim0, dim1]
// This is needed to:
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
index 34b1283add..5da317bf38 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/Common.h
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
#include "ckw/TensorInfo.h"
+
#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
namespace arm_compute
@@ -38,7 +39,7 @@ namespace dynamic_fusion
{
inline ckw::DataType to_ckw(DataType dt)
{
- switch(dt)
+ switch (dt)
{
case DataType::F32:
return ckw::DataType::Fp32;
@@ -65,21 +66,16 @@ inline ckw::DataType to_ckw(DataType dt)
inline ckw::TensorShape to_ckw(const TensorShape &shape)
{
- ARM_COMPUTE_ERROR_ON(shape.num_max_dimensions < std::tuple_size<ckw::TensorShape> {});
- ARM_COMPUTE_ERROR_ON(std::tuple_size<ckw::TensorShape> {} != 5);
+ ARM_COMPUTE_ERROR_ON(shape.num_max_dimensions < std::tuple_size<ckw::TensorShape>{});
+ ARM_COMPUTE_ERROR_ON(std::tuple_size<ckw::TensorShape>{} != 5);
/// NOTE: Overflow danger. Use size_t?
- return ckw::TensorShape
- {
- static_cast<int32_t>(shape[0]),
- static_cast<int32_t>(shape[1]),
- static_cast<int32_t>(shape[2]),
- static_cast<int32_t>(shape[3]),
- static_cast<int32_t>(shape[4])
- };
+ return ckw::TensorShape{static_cast<int32_t>(shape[0]), static_cast<int32_t>(shape[1]),
+ static_cast<int32_t>(shape[2]), static_cast<int32_t>(shape[3]),
+ static_cast<int32_t>(shape[4])};
}
inline ckw::TensorDataLayout to_ckw(DataLayout dl)
{
- switch(dl)
+ switch (dl)
{
case DataLayout::NHWC:
return ckw::TensorDataLayout::Nhwc;
@@ -91,18 +87,13 @@ inline ckw::TensorDataLayout to_ckw(DataLayout dl)
}
inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info)
{
- return ckw::TensorInfo
- {
- to_ckw(tensor_info.data_type()),
- to_ckw(tensor_info.tensor_shape()),
- to_ckw(tensor_info.data_layout()),
- tensor_info.id()
- };
+ return ckw::TensorInfo{to_ckw(tensor_info.data_type()), to_ckw(tensor_info.tensor_shape()),
+ to_ckw(tensor_info.data_layout()), tensor_info.id()};
}
inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
{
- switch(component)
+ switch (component)
{
case ckw::TensorComponentType::OffsetFirstElement:
return TensorComponentType::OffsetFirstElement;
@@ -142,7 +133,7 @@ inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
{
- switch(storage)
+ switch (storage)
{
case TensorStorageType::ClBufferUint8Ptr:
return ckw::TensorStorageType::BufferUint8Ptr;
@@ -159,7 +150,7 @@ inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
}
inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage)
{
- switch(storage)
+ switch (storage)
{
case ckw::TensorStorageType::BufferUint8Ptr:
return TensorStorageType::ClBufferUint8Ptr;
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
index 9cb022fc10..0cba258940 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/type_converter/ElementwiseBinary.h
@@ -25,6 +25,7 @@
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_COMPONENTS_UTILS_TYPE_CONVERTER_ELEMENTWISEBINARY
#include "ckw/types/Operators.h"
+
#include "src/dynamic_fusion/sketch/gpu/operators/internal/GpuElementwiseBinaryCommon.h"
namespace arm_compute
@@ -35,7 +36,7 @@ namespace dynamic_fusion
{
inline ckw::BinaryOp to_ckw(const ElementwiseBinaryCommonAttributes &attributes)
{
- switch(attributes.operation())
+ switch (attributes.operation())
{
case ElementwiseBinaryCommonAttributes::ElementwiseOp::Add:
return ckw::BinaryOp::Add;