aboutsummaryrefslogtreecommitdiff
path: root/src/dynamic_fusion/sketch/gpu/ckw_driver/components
diff options
context:
space:
mode:
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/ckw_driver/components')
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp31
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp21
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp33
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp5
-rw-r--r--src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h98
5 files changed, 148 insertions, 40 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
index 224c176a31..c07fac0e0d 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp
@@ -23,14 +23,15 @@
*/
#include "GpuCkwActivation.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include <string>
@@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
} // namespace
GpuCkwActivation::GpuCkwActivation(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
: IGpuCkwComponentDriver{ id, tensors },
_src{},
_dst{},
@@ -102,8 +103,8 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler);
@@ -111,14 +112,14 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp
auto &dst_tile = dst->tile();
// Constants
- const auto &constant_minus_1 = writer->declare_tile("minus_1", -1);
- const auto &constant_pos_1 = writer->declare_tile("one", 1);
- const auto &constant_zero = writer->declare_tile("zero", 0);
- const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a());
- const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
+ const auto &constant_minus_1 = writer->declare_tile("minus_1", -1);
+ const auto &constant_pos_1 = writer->declare_tile("one", 1);
+ const auto &constant_zero = writer->declare_tile("zero", 0);
+ const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a());
+ const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b());
// Perform the operation.
- switch (_attributes.activation())
+ switch(_attributes.activation())
{
case ActivationLayerInfo::ActivationFunction::LOGISTIC:
{
@@ -178,9 +179,9 @@ Window GpuCkwActivation::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
index dd71c55df2..8d7e6a8c37 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp
@@ -23,14 +23,15 @@
*/
#include "GpuCkwCast.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h"
#include <string>
@@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_
} // namespace
GpuCkwCast::GpuCkwCast(ComponentId id,
- const ArgumentPack<ITensorInfo> &tensors,
- const Attributes &attributes)
+ const ArgumentPack<ITensorInfo> &tensors,
+ const Attributes &attributes)
: IGpuCkwComponentDriver{ id, tensors },
_src{},
_dst{},
@@ -102,8 +103,8 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the source tile and prepare the sampler.
if(!src->has_tile())
@@ -124,7 +125,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa
if(!dst->has_tile())
{
// Get Target datatype and convert it to ckw::DataType.
- ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
+ ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type());
// Create dst_tile based on src_tile dimensions and with target DataType.
const TileInfo src_tile_info = src_tile.tile_info();
@@ -166,9 +167,9 @@ Window GpuCkwCast::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- constexpr unsigned int vector_size_byte_opencl = 16;
- const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
index 685bf391dc..15e32e26d5 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp
@@ -23,14 +23,16 @@
*/
#include "GpuCkwElementwiseBinary.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
-#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "arm_compute/core/Error.h"
#include "arm_compute/core/Validate.h"
+#include "arm_compute/core/utils/helpers/AdjustVecSize.h"
#include "ckw/TensorTileSampler.h"
#include "ckw/types/TensorSamplerTypes.h"
#include "src/core/helpers/WindowHelpers.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
+#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h"
#include <string>
@@ -54,14 +56,20 @@ inline TensorTileSampler create_simple_sampler(GpuCkwScopedKernelWriter &writer,
auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32);
auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32);
- auto &const_0 = writer->declare_tile("0", 0);
-
writer->op_get_global_id(gid_0, 0);
writer->op_get_global_id(gid_1, 1);
writer->op_get_global_id(gid_2, 2);
- sampler.x(gid_0);
- sampler.y(gid_1);
+ auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32);
+ auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32);
+ auto &m0_t = writer->declare_tile("m0", m0);
+ auto &n0_t = writer->declare_tile("n0", n0);
+ writer->op_binary_expression(x_coord, gid_0, ckw::BinaryOp::Mul, n0_t);
+ writer->op_binary_expression(y_coord, gid_1, ckw::BinaryOp::Mul, m0_t);
+
+ sampler.x(x_coord);
+ sampler.y(y_coord);
+ auto &const_0 = writer->declare_tile("0", 0);
sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension
sampler.b(gid_2);
@@ -99,9 +107,9 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr
const unsigned int n0 = root_window.x().step();
const unsigned int m0 = root_window.y().step();
- GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs");
- GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs");
- GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs");
+ GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs");
+ GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
// Load the LHS and RHS tiles and prepare the tensor sampler.
load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler);
@@ -131,10 +139,9 @@ Window GpuCkwElementwiseBinary::get_window() const
// Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged
// This is in line with the collapsing convention used by operators like Conv2d
output_shape.collapse(2U, 1U);
- // constexpr unsigned int vector_size_byte_opencl = 16;
- // const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
- const unsigned int num_elems_processed_per_iteration = 1U; // Hard-coded for now
- Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
+ constexpr unsigned int vector_size_byte_opencl = 16;
+ const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0));
+ Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration));
return win;
}
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
index 63555e6064..247d1b834f 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp
@@ -24,6 +24,7 @@
#include "GpuCkwStore.h"
#include "arm_compute/core/Error.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h"
#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h"
@@ -43,8 +44,8 @@ GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensor
}
void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const
{
- auto src = vtable.declare_variable(comp_group, writer, _src, "src");
- auto dst = vtable.declare_variable(comp_group, writer, _dst, "dst");
+ auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src");
+ auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst");
auto &src_tile = src->tile();
const auto &sampler = src->tile_sampler();
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
index 9027bddd76..8a38d67d80 100644
--- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
+++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h
@@ -28,6 +28,7 @@
#include "arm_compute/core/TensorShape.h"
#include "arm_compute/core/Types.h"
#include "ckw/TensorInfo.h"
+#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h"
namespace arm_compute
{
@@ -98,6 +99,103 @@ inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info)
tensor_info.id()
};
}
+
+inline TensorComponentType from_ckw(const ckw::TensorComponentType &component)
+{
+ switch(component)
+ {
+ case ckw::TensorComponentType::OffsetFirstElement:
+ return TensorComponentType::OffsetFirstElement;
+ break;
+ case ckw::TensorComponentType::Stride0:
+ return TensorComponentType::Stride0;
+ break;
+ case ckw::TensorComponentType::Stride1:
+ return TensorComponentType::Stride1;
+ break;
+ case ckw::TensorComponentType::Stride2:
+ return TensorComponentType::Stride2;
+ break;
+ case ckw::TensorComponentType::Stride3:
+ return TensorComponentType::Stride3;
+ break;
+ case ckw::TensorComponentType::Stride4:
+ return TensorComponentType::Stride4;
+ break;
+ case ckw::TensorComponentType::Dim0:
+ return TensorComponentType::Dim0;
+ break;
+ case ckw::TensorComponentType::Dim1:
+ return TensorComponentType::Dim1;
+ break;
+ case ckw::TensorComponentType::Dim2:
+ return TensorComponentType::Dim2;
+ break;
+ case ckw::TensorComponentType::Dim3:
+ return TensorComponentType::Dim3;
+ break;
+ case ckw::TensorComponentType::Dim4:
+ return TensorComponentType::Dim4;
+ break;
+ case ckw::TensorComponentType::Dim1xDim2:
+ return TensorComponentType::Dim1xDim2;
+ break;
+ case ckw::TensorComponentType::Dim2xDim3:
+ return TensorComponentType::Dim2xDim3;
+ break;
+ case ckw::TensorComponentType::Dim1xDim2xDim3:
+ return TensorComponentType::Dim1xDim2xDim3;
+ break;
+ case ckw::TensorComponentType::Unknown:
+ return TensorComponentType::Unknown;
+ default:
+ ARM_COMPUTE_ERROR("Unknown CKW tensor component");
+ return TensorComponentType::Unknown;
+ }
+}
+
+inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage)
+{
+ switch(storage)
+ {
+ case TensorStorageType::ClBufferUint8Ptr:
+ return ckw::TensorStorageType::BufferUint8Ptr;
+ break;
+ case TensorStorageType::ClImage2dReadOnly:
+ return ckw::TensorStorageType::Texture2dReadOnly;
+ break;
+ case TensorStorageType::ClImage2dWriteOnly:
+ return ckw::TensorStorageType::Texture2dWriteOnly;
+ break;
+ case TensorStorageType::Unknown:
+ return ckw::TensorStorageType::Unknown;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unknown tensor storage type");
+ return ckw::TensorStorageType::Unknown;
+ }
+}
+inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage)
+{
+ switch(storage)
+ {
+ case ckw::TensorStorageType::BufferUint8Ptr:
+ return TensorStorageType::ClBufferUint8Ptr;
+ break;
+ case ckw::TensorStorageType::Texture2dReadOnly:
+ return TensorStorageType::ClImage2dReadOnly;
+ break;
+ case ckw::TensorStorageType::Texture2dWriteOnly:
+ return TensorStorageType::ClImage2dWriteOnly;
+ break;
+ case ckw::TensorStorageType::Unknown:
+ return TensorStorageType::Unknown;
+ break;
+ default:
+ ARM_COMPUTE_ERROR("Unknown CKW tensor storage type");
+ return TensorStorageType::Unknown;
+ }
+}
} // namespace dynamic_fusion
} // namespace experimental
} // namespace arm_compute