diff options
Diffstat (limited to 'src/dynamic_fusion/sketch/gpu/ckw_driver/components')
5 files changed, 148 insertions, 40 deletions
diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp index 224c176a31..c07fac0e0d 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwActivation.cpp @@ -23,14 +23,15 @@ */ #include "GpuCkwActivation.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" #include <string> @@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_ } // namespace GpuCkwActivation::GpuCkwActivation(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) + const ArgumentPack<ITensorInfo> &tensors, + const Attributes &attributes) : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{}, @@ -102,8 +103,8 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); load_src_dst_tiles_and_prepare_sampler(writer, src, dst, m0, n0, create_sampler); @@ -111,14 +112,14 @@ void GpuCkwActivation::write_component_code(const ComponentGroup &comp_group, Gp auto &dst_tile = dst->tile(); // Constants - const auto &constant_minus_1 = writer->declare_tile("minus_1", -1); - const auto &constant_pos_1 = writer->declare_tile("one", 1); - const auto &constant_zero = writer->declare_tile("zero", 0); - const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a()); - const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b()); + const auto &constant_minus_1 = writer->declare_tile("minus_1", -1); + const auto &constant_pos_1 = writer->declare_tile("one", 1); + const auto &constant_zero = writer->declare_tile("zero", 0); + const auto &constant_A = writer->declare_tile("A_VAL", _attributes.a()); + const auto &constant_B = writer->declare_tile("B_VAL", _attributes.b()); // Perform the operation. - switch (_attributes.activation()) + switch(_attributes.activation()) { case ActivationLayerInfo::ActivationFunction::LOGISTIC: { @@ -178,9 +179,9 @@ Window GpuCkwActivation::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - constexpr unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp index dd71c55df2..8d7e6a8c37 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwCast.cpp @@ -23,14 +23,15 @@ */ #include "GpuCkwCast.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" #include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h" #include <string> @@ -84,8 +85,8 @@ inline TensorTileSampler create_sampler(GpuCkwScopedKernelWriter &writer, int32_ } // namespace GpuCkwCast::GpuCkwCast(ComponentId id, - const ArgumentPack<ITensorInfo> &tensors, - const Attributes &attributes) + const ArgumentPack<ITensorInfo> &tensors, + const Attributes &attributes) : IGpuCkwComponentDriver{ id, tensors }, _src{}, _dst{}, @@ -102,8 +103,8 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, "src"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); // Load the source tile and prepare the sampler. if(!src->has_tile()) @@ -124,7 +125,7 @@ void GpuCkwCast::write_component_code(const ComponentGroup &comp_group, GpuCkwVa if(!dst->has_tile()) { // Get Target datatype and convert it to ckw::DataType. - ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type()); + ckw::DataType target_dt = dynamic_fusion::to_ckw(_attributes.data_type()); // Create dst_tile based on src_tile dimensions and with target DataType. const TileInfo src_tile_info = src_tile.tile_info(); @@ -166,9 +167,9 @@ Window GpuCkwCast::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - constexpr unsigned int vector_size_byte_opencl = 16; - const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp index 685bf391dc..15e32e26d5 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwElementwiseBinary.cpp @@ -23,14 +23,16 @@ */ #include "GpuCkwElementwiseBinary.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" -#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "arm_compute/core/Error.h" #include "arm_compute/core/Validate.h" +#include "arm_compute/core/utils/helpers/AdjustVecSize.h" #include "ckw/TensorTileSampler.h" #include "ckw/types/TensorSamplerTypes.h" #include "src/core/helpers/WindowHelpers.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/GpuKernelComponentGroup.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" +#include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/WriterHelper.h" #include <string> @@ -54,14 +56,20 @@ inline TensorTileSampler create_simple_sampler(GpuCkwScopedKernelWriter &writer, auto &gid_1 = writer->declare_tile("gid_1", ckw::DataType::Int32); auto &gid_2 = writer->declare_tile("gid_2", ckw::DataType::Int32); - auto &const_0 = writer->declare_tile("0", 0); - writer->op_get_global_id(gid_0, 0); writer->op_get_global_id(gid_1, 1); writer->op_get_global_id(gid_2, 2); - sampler.x(gid_0); - sampler.y(gid_1); + auto &x_coord = writer->declare_tile("x_coord", ckw::DataType::Int32); + auto &y_coord = writer->declare_tile("y_coord", ckw::DataType::Int32); + auto &m0_t = writer->declare_tile("m0", m0); + auto &n0_t = writer->declare_tile("n0", n0); + writer->op_binary_expression(x_coord, gid_0, ckw::BinaryOp::Mul, n0_t); + writer->op_binary_expression(y_coord, gid_1, ckw::BinaryOp::Mul, m0_t); + + sampler.x(x_coord); + sampler.y(y_coord); + auto &const_0 = writer->declare_tile("0", 0); sampler.z(const_0); // 3rd dimension collapsed with 2nd dimension sampler.b(gid_2); @@ -99,9 +107,9 @@ void GpuCkwElementwiseBinary::write_component_code(const ComponentGroup &comp_gr const unsigned int n0 = root_window.x().step(); const unsigned int m0 = root_window.y().step(); - GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, "lhs"); - GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, "rhs"); - GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + GpuCkwComponentArgument *lhs = vtable.declare_variable(comp_group, writer, _lhs, TensorStorageType::ClBufferUint8Ptr, "lhs"); + GpuCkwComponentArgument *rhs = vtable.declare_variable(comp_group, writer, _rhs, TensorStorageType::ClBufferUint8Ptr, "rhs"); + GpuCkwComponentArgument *dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); // Load the LHS and RHS tiles and prepare the tensor sampler. load_lhs_rhs_tiles_and_prepare_sampler(writer, lhs, rhs, m0, n0, create_simple_sampler); @@ -131,10 +139,9 @@ Window GpuCkwElementwiseBinary::get_window() const // Collapse Dim 1 (W) and Dim 2 (H) together, leave Dim 0 (C) unchanged // This is in line with the collapsing convention used by operators like Conv2d output_shape.collapse(2U, 1U); - // constexpr unsigned int vector_size_byte_opencl = 16; - // const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); - const unsigned int num_elems_processed_per_iteration = 1U; // Hard-coded for now - Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); + constexpr unsigned int vector_size_byte_opencl = 16; + const unsigned int num_elems_processed_per_iteration = adjust_vec_size(vector_size_byte_opencl / _dst->element_size(), _dst->dimension(0)); + Window win = calculate_max_window(output_shape, Steps(num_elems_processed_per_iteration)); return win; } diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp index 63555e6064..247d1b834f 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/GpuCkwStore.cpp @@ -24,6 +24,7 @@ #include "GpuCkwStore.h" #include "arm_compute/core/Error.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwScopedKernelWriter.h" #include "src/dynamic_fusion/sketch/gpu/ckw_driver/GpuCkwVariableTable.h" @@ -43,8 +44,8 @@ GpuCkwStore::GpuCkwStore(ComponentId id, const ArgumentPack<ITensorInfo> &tensor } void GpuCkwStore::write_component_code(const ComponentGroup &comp_group, GpuCkwVariableTable &vtable, GpuCkwScopedKernelWriter writer) const { - auto src = vtable.declare_variable(comp_group, writer, _src, "src"); - auto dst = vtable.declare_variable(comp_group, writer, _dst, "dst"); + auto src = vtable.declare_variable(comp_group, writer, _src, TensorStorageType::ClBufferUint8Ptr, "src"); + auto dst = vtable.declare_variable(comp_group, writer, _dst, TensorStorageType::ClBufferUint8Ptr, "dst"); auto &src_tile = src->tile(); const auto &sampler = src->tile_sampler(); diff --git a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h index 9027bddd76..8a38d67d80 100644 --- a/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h +++ b/src/dynamic_fusion/sketch/gpu/ckw_driver/components/utils/TypeConverter.h @@ -28,6 +28,7 @@ #include "arm_compute/core/TensorShape.h" #include "arm_compute/core/Types.h" #include "ckw/TensorInfo.h" +#include "src/dynamic_fusion/sketch/gpu/GpuKernelArgument.h" namespace arm_compute { @@ -98,6 +99,103 @@ inline ckw::TensorInfo to_ckw(const ITensorInfo &tensor_info) tensor_info.id() }; } + +inline TensorComponentType from_ckw(const ckw::TensorComponentType &component) +{ + switch(component) + { + case ckw::TensorComponentType::OffsetFirstElement: + return TensorComponentType::OffsetFirstElement; + break; + case ckw::TensorComponentType::Stride0: + return TensorComponentType::Stride0; + break; + case ckw::TensorComponentType::Stride1: + return TensorComponentType::Stride1; + break; + case ckw::TensorComponentType::Stride2: + return TensorComponentType::Stride2; + break; + case ckw::TensorComponentType::Stride3: + return TensorComponentType::Stride3; + break; + case ckw::TensorComponentType::Stride4: + return TensorComponentType::Stride4; + break; + case ckw::TensorComponentType::Dim0: + return TensorComponentType::Dim0; + break; + case ckw::TensorComponentType::Dim1: + return TensorComponentType::Dim1; + break; + case ckw::TensorComponentType::Dim2: + return TensorComponentType::Dim2; + break; + case ckw::TensorComponentType::Dim3: + return TensorComponentType::Dim3; + break; + case ckw::TensorComponentType::Dim4: + return TensorComponentType::Dim4; + break; + case ckw::TensorComponentType::Dim1xDim2: + return TensorComponentType::Dim1xDim2; + break; + case ckw::TensorComponentType::Dim2xDim3: + return TensorComponentType::Dim2xDim3; + break; + case ckw::TensorComponentType::Dim1xDim2xDim3: + return TensorComponentType::Dim1xDim2xDim3; + break; + case ckw::TensorComponentType::Unknown: + return TensorComponentType::Unknown; + default: + ARM_COMPUTE_ERROR("Unknown CKW tensor component"); + return TensorComponentType::Unknown; + } +} + +inline ckw::TensorStorageType to_ckw(const TensorStorageType &storage) +{ + switch(storage) + { + case TensorStorageType::ClBufferUint8Ptr: + return ckw::TensorStorageType::BufferUint8Ptr; + break; + case TensorStorageType::ClImage2dReadOnly: + return ckw::TensorStorageType::Texture2dReadOnly; + break; + case TensorStorageType::ClImage2dWriteOnly: + return ckw::TensorStorageType::Texture2dWriteOnly; + break; + case TensorStorageType::Unknown: + return ckw::TensorStorageType::Unknown; + break; + default: + ARM_COMPUTE_ERROR("Unknown tensor storage type"); + return ckw::TensorStorageType::Unknown; + } +} +inline TensorStorageType from_ckw(const ckw::TensorStorageType &storage) +{ + switch(storage) + { + case ckw::TensorStorageType::BufferUint8Ptr: + return TensorStorageType::ClBufferUint8Ptr; + break; + case ckw::TensorStorageType::Texture2dReadOnly: + return TensorStorageType::ClImage2dReadOnly; + break; + case ckw::TensorStorageType::Texture2dWriteOnly: + return TensorStorageType::ClImage2dWriteOnly; + break; + case ckw::TensorStorageType::Unknown: + return TensorStorageType::Unknown; + break; + default: + ARM_COMPUTE_ERROR("Unknown CKW tensor storage type"); + return TensorStorageType::Unknown; + } +} } // namespace dynamic_fusion } // namespace experimental } // namespace arm_compute |