aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/ckw_driver
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/ckw_driver')
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp45
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h7
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp8
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h7
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp31
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp21
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp33
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp5
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h98
9 files changed, 190 insertions, 65 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
index d5c03c60c5..d78956f835 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.cpp
@@ -30,6 +30,7 @@
#include "arm_compute/core/Window.h"
#include "src/common/utils/Log.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
@@ -42,29 +43,24 @@ namespace experimental
namespace dynamic_fusion
{
GpuCkwDriver::GpuCkwDriver(const GpuKernelComponentGroup &components)
- : _components{ components }
+ : _components{ components }, _kernel{ GpuTargetLanguage::OpenCL }
{
}
std::string GpuCkwDriver::get_name()
{
ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO"));
- return "todo_get_name";
+ return "unnamed";
}
std::string GpuCkwDriver::get_code()
{
- ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO"));
- ckw::Kernel kernel(get_name().c_str(), GpuTargetLanguage::OpenCL);
- GpuCkwKernelWriter root_writer(kernel);
+ _kernel.name(get_name());
+ GpuCkwKernelWriter root_writer(_kernel);
GpuCkwScopedKernelWriter writer(&root_writer);
GpuCkwVariableTable vtable{};
// Global Kernel Writer Driver code
-
- // The following is just an incomplete example of using the kernel writer
-
- // Iterate over component specific Ckw Driver; generate component code and concatenate them
for(auto &comp : _components)
{
auto ckw_driver = comp->ckw_component_driver();
@@ -96,18 +92,31 @@ Window GpuCkwDriver::get_window() const
return root_comp->ckw_component_driver()->get_window();
}
-std::map<ITensorInfo::Id, GpuKernelArgument> GpuCkwDriver::get_tensors()
+GpuKernelArgumentList GpuCkwDriver::get_kernel_arguments()
{
- ARM_COMPUTE_LOG_PARAMS(std::string("[V1] TODO"));
- // Assemble GpuKernelArguments
- std::map<ITensorInfo::Id, GpuKernelArgument> tensors;
- for(const auto t : _components.get_argument_tensors())
+ GpuKernelArgumentList args{};
+ for(const auto &arg : _kernel.arguments())
{
- tensors.emplace(
- t->id(),
- GpuKernelArgument{ *t, { GpuKernelArgumentInfo::Type::Tensor_Special_0 } });
+ switch(arg.type())
+ {
+ case KernelArgument::Type::TensorStorage:
+ {
+ args.emplace_back(static_cast<ITensorInfo::Id>(arg.id()), from_ckw(arg.tensor_storage_type()));
+ break;
+ }
+ case KernelArgument::Type::TensorComponent:
+ {
+ args.emplace_back(static_cast<ITensorInfo::Id>(arg.id()), from_ckw(arg.tensor_component_type()));
+ break;
+ }
+ default:
+ {
+ ARM_COMPUTE_ERROR("Unsupported KernelArgument Type");
+ break;
+ }
+ }
}
- return tensors;
+ return args;
}
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
index 2084b72098..c6e03f6e03 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwDriver.h
@@ -28,6 +28,8 @@
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/IGpuKernelWriter.h"
+#include "ckw/Kernel.h"
+
#include <map>
#include <string>
@@ -66,11 +68,12 @@ public:
std::string get_config_id() override;
/** Generate execution window */
Window get_window() const override;
- /** Get the kernel argument lists of the kernel*/
- std::map<ITensorInfo::Id, GpuKernelArgument> get_tensors() override;
+ /** Get the flat list of arguments of the kernel*/
+ GpuKernelArgumentList get_kernel_arguments() override;
private:
GpuKernelComponentGroup _components{};
+ ckw::Kernel _kernel;
};
} // namespace dynamic_fusion
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
index 154968775c..6f3eca711d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.cpp
@@ -23,9 +23,10 @@
*/
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include <sstream>
@@ -35,7 +36,8 @@ namespace experimental
{
namespace dynamic_fusion
{
-GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias)
+GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
+ const std::string &alias)
{
ARM_COMPUTE_ERROR_ON_MSG(!tensor->has_valid_id(), "Tensor info with valid id expected");
@@ -59,7 +61,7 @@ GpuCkwComponentArgument *GpuCkwVariableTable::declare_variable(const GpuKernelCo
std::stringstream ss;
ss << alias << "_t" << abs(tensor->id());
const auto uniq_name = ss.str();
- GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name.c_str(), to_ckw(*tensor)) };
+ GpuCkwComponentArgument var{ writer->declare_tensor_argument(uniq_name, to_ckw(*tensor), to_ckw(storage)) };
auto &&inserted = _vars.emplace(tensor->id(), var);
return &(inserted.first->second);
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
index 1c9cb083ea..0649dcba9d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h
@@ -24,8 +24,8 @@
#ifndef ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE
#define ACL_SRC_DYNAMIC_FUSION_SKETCH_GPU_CKW_DRIVER_GPUCKWVARIABLETABLE
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include "arm_compute/core/ITensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwComponentArgument.h"
#include <map>
@@ -37,6 +37,7 @@ namespace dynamic_fusion
{
class GpuKernelComponentGroup;
class GpuCkwScopedKernelWriter;
+enum class TensorStorageType;
/** A table of all the variables used in the kernel.
*
@@ -52,11 +53,13 @@ public:
* @param[in] comp_group Component group the tensor belongs to
* @param[in] writer Compute Kernel Writer
* @param[in] tensor Tensor info with which the new variable is associated
+ * @param[in] storage Tensor storage type associated with the tensor
* @param[in] alias Alias for the variable. Will be used as part of the variable name
*
* @return GpuCkwComponentArgument*
*/
- GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, const std::string &alias = "unnamed");
+ GpuCkwComponentArgument *declare_variable(const GpuKernelComponentGroup &comp_group, GpuCkwScopedKernelWriter &writer, const ITensorInfo *tensor, TensorStorageType storage,
+ const std::string &alias = "unnamed");
private:
std::map<ITensorInfo::Id, GpuCkwComponentArgument> _vars{};
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
index 224c176a31..c07fac0e0d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
@@ -23,14 +23,15 @@
*/
#include "GpuCkwActivation.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include <string>
@@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
} // namespace
GpuCkwActivation::GpuCkwActivation(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
: IGpuCkwComponentDriver{ id, tensors },
_src{},
_dst{},
@@ -102,8 +103,8 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler);
@@ -111,14 +112,14 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
auto &dst_tile = dst->tile();
// Constants
- const auto &constant_minus_1 = writer->declare_tile("minus_1", -1);
- const auto &constant_pos_1 = writer->declare_tile("one", 1);
- const auto &constant_zero = writer->declare_tile("zero", 0);
- const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a());
- const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
+ const auto &constant_minus_1 = writer->declare_tile("minus_1", -1);
+ const auto &constant_pos_1 = writer->declare_tile("one", 1);
+ const auto &constant_zero = writer->declare_tile("zero", 0);
+ const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a());
+ const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
// Perform the operation.
- switch (_attributes.activation())
+ switch(_attributes.activation())
{
case ActivationLayerInfo::ActivationFunction::LOGISTIC:
{
@@ -178,9 +179,9 @@ Window GpuCkwActivation::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
index dd71c55df2..8d7e6a8c37 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -23,14 +23,15 @@
*/
#include "GpuCkwCast.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include <string>
@@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
} // namespace
GpuCkwCast::GpuCkwCast(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
: IGpuCkwComponentDriver{ id, tensors },
_src{},
_dst{},
@@ -102,8 +103,8 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the source tile and prepare the sampler.
if(!src->has_tile())
@@ -124,7 +125,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
if(!dst->has_tile())
{
// Get Target datatype and convert it to ckw::DataType.
- ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
+ ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
// Create dst_tile based on src_tile dimensions and with target DataType.
const TileInfo src_tile_info = src_tile.tile_info();
@@ -166,9 +167,9 @@ Window GpuCkwCast::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
index 685bf391dc..15e32e26d5 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
@@ -23,14 +23,16 @@
*/
#include "GpuCkwElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "ckw/types/TensorSamplerTypes.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include <string>
@@ -54,14 +56,20 @@ inline TensorTileSampler create_simple_sampler(GpuCkwScopedKernelWriter &writer,
auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32);
- auto &const_0 = writer->declare_tile("0", 0);
-
writer->op_get_global_id(gid_0, 0);
writer->op_get_global_id(gid_1, 1);
writer->op_get_global_id(gid_2, 2);
- sampler.x(gid_0);
- sampler.y(gid_1);
+ auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32);
+ auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32);
+ auto &m0_t = writer->declare_tile("m0", m0);
+ auto &n0_t = writer->declare_tile("n0", n0);
+ writer->op_binary_expression(x_coord, gid_0, ckw::BinaryOp::Mul, n0_t);
+ writer->op_binary_expression(y_coord, gid_1, ckw::BinaryOp::Mul, m0_t);
+
+ sampler.x(x_coord);
+ sampler.y(y_coord);
+ auto &const_0 = writer->declare_tile("0", 0);
sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension
sampler.b(gid_2);
@@ -99,9 +107,9 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs");
- GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
+ GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the LHS and RHS tiles and prepare the tensor sampler.
load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler);
@@ -131,10 +139,9 @@ Window GpuCkwElementwiseBinary::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- // constexpr unsigned int vector_size_byte_opencl = 16;
- // const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- const unsigned int num_elems_processed_per_iteration = 1U; // Hard-coded for now
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
index 63555e6064..247d1b834f 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
@@ -24,6 +24,7 @@
#include "GpuCkwStore.h"
#include "arm_compute/core/Error.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
@@ -43,8 +44,8 @@ GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensor
}
void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
{
- auto src = vtable.declare_variable(comp_group, writer, _src, "src");
- auto dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
auto &src_tile = src->tile();
const auto &sampler = src->tile_sampler();
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
index 9027bddd76..8a38d67d80 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
#include "ckw/TensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
namespace arm_compute
{
@@ -98,6 +99,103 @@ inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info)
tensor_info.id()
};
}
+
+inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
+{
+ switch(component)
+ {
+ case ckw::TensorComponentType::OffsetFirstElement:
+ return TensorComponentType::OffsetFirstElement;
+ break;
+ case ckw::TensorComponentType::Stride0:
+ return TensorComponentType::Stride0;
+ break;
+ case ckw::TensorComponentType::Stride1:
+ return TensorComponentType::Stride1;
+ break;
+ case ckw::TensorComponentType::Stride2:
+ return TensorComponentType::Stride2;
+ break;
+ case ckw::TensorComponentType::Stride3:
+ return TensorComponentType::Stride3;
+ break;
+ case ckw::TensorComponentType::Stride4:
+ return TensorComponentType::Stride4;
+ break;
+ case ckw::TensorComponentType::Dim0:
+ return TensorComponentType::Dim0;
+ break;
+ case ckw::TensorComponentType::Dim1:
+ return TensorComponentType::Dim1;
+ break;
+ case ckw::TensorComponentType::Dim2:
+ return TensorComponentType::Dim2;
+ break;
+ case ckw::TensorComponentType::Dim3:
+ return TensorComponentType::Dim3;
+ break;
+ case ckw::TensorComponentType::Dim4:
+ return TensorComponentType::Dim4;
+ break;
+ case ckw::TensorComponentType::Dim1xDim2:
+ return TensorComponentType::Dim1xDim2;
+ break;
+ case ckw::TensorComponentType::Dim2xDim3:
+ return TensorComponentType::Dim2xDim3;
+ break;
+ case ckw::TensorComponentType::Dim1xDim2xDim3:
+ return TensorComponentType::Dim1xDim2xDim3;
+ break;
+ case ckw::TensorComponentType::Unknown:
+ return TensorComponentType::Unknown;
+ default:
+ ARM_COMPUTE_ERROR("Unknown CKW tensor component");
+ return TensorComponentType::Unknown;
+ }
+}
+
+inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
+{
+ switch(storage)
+ {
+ case TensorStorageType::ClBufferUint8Ptr:
+ return ckw::TensorStorageType::BufferUint8Ptr;
+ break;
+ case TensorStorageType::ClImage2dReadOnly:
+ return ckw::TensorStorageType::Texture2dReadOnly;
+ break;
+ case TensorStorageType::ClImage2dWriteOnly:
+ return ckw::TensorStorageType::Texture2dWriteOnly;
+ break;
+ case TensorStorageType::Unknown:
+ return ckw::TensorStorageType::Unknown;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unknown tensor storage type");
+ return ckw::TensorStorageType::Unknown;
+ }
+}
+inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage)
+{
+ switch(storage)
+ {
+ case ckw::TensorStorageType::BufferUint8Ptr:
+ return TensorStorageType::ClBufferUint8Ptr;
+ break;
+ case ckw::TensorStorageType::Texture2dReadOnly:
+ return TensorStorageType::ClImage2dReadOnly;
+ break;
+ case ckw::TensorStorageType::Texture2dWriteOnly:
+ return TensorStorageType::ClImage2dWriteOnly;
+ break;
+ case ckw::TensorStorageType::Unknown:
+ return TensorStorageType::Unknown;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unknown CKW tensor storage type");
+ return TensorStorageType::Unknown;
+ }
+}
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute