diff options
author | Jakub Sujak <jakub.sujak@arm.com> | 2023-07-31 13:36:58 +0100 |
---|---|---|
committer | Jakub Sujak <jakub.sujak@arm.com> | 2023-08-07 08:44:17 +0000 |
commit | e1c96e7e6dbf5314676fc81831e2ccb34a031ea1 (patch) | |
tree | f69ee0ac5dd6b29de0041293b279804bcecf0df7 /compute_kernel_writer/prototype/src/KernelWriter.cpp | |
parent | 78ce2730ecd2f1e666cdd10263bf054c0b740a9c (diff) | |
download | ComputeLibrary-e1c96e7e6dbf5314676fc81831e2ccb34a031ea1.tar.gz |
Port DirectConv2d to CKW backend
Ports the direct convolution 2D kernel from the experimental Dynamic Fusion interface to use the new Compute Kernel Writer backend for OpenCL code generation.
Support is for FP16/FP32 only.
Resolves: COMPMID-6259
Change-Id: Ia8d7b9cb789737b22b1d877cd798a73eda0ce4ab
Signed-off-by: Jakub Sujak <jakub.sujak@arm.com>
Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10059
Reviewed-by: Gunes Bayir <gunes.bayir@arm.com>
Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com>
Comments-Addressed: Arm Jenkins <bsgcomp@arm.com>
Tested-by: Arm Jenkins <bsgcomp@arm.com>
Benchmark: Arm Jenkins <bsgcomp@arm.com>
Diffstat (limited to 'compute_kernel_writer/prototype/src/KernelWriter.cpp')
-rw-r--r-- | compute_kernel_writer/prototype/src/KernelWriter.cpp | 58 |
1 files changed, 56 insertions, 2 deletions
diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp index 9122e518b4..f29cf12802 100644 --- a/compute_kernel_writer/prototype/src/KernelWriter.cpp +++ b/compute_kernel_writer/prototype/src/KernelWriter.cpp @@ -128,6 +128,10 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> ope name, prototype::TileInfo(info.data_type(), info.width(), info.height())); } + else + { + _impl->declare_const_tile(name, operand.value(), operand.data_type()); + } return operand; } @@ -136,7 +140,7 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> ope // Load and store // ================================================================================================= -void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler) +void KernelWriter::op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y) { prototype::TensorOperand impl_tensor( tensor.name(), @@ -152,9 +156,59 @@ void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const Tenso auto impl_z = sampler.z().create_impl_operand(_impl.get()); auto impl_b = sampler.b().create_impl_operand(_impl.get()); + auto impl_dilation_y = dilation_y.create_impl_operand(_impl.get()); + + auto impl_dst = tile.create_impl_operand(_impl.get()); + + _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b, impl_dilation_y); +} + +void KernelWriter::op_load_indirect(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler) +{ + prototype::TensorOperand impl_tensor( + tensor.name(), + prototype::GpuSampler{ + sampler.format(), + prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), + sampler.address_mode_y(), + sampler.address_mode_z() }); + + auto impl_x = sampler.x().create_impl_operand(_impl.get()); + auto impl_y = sampler.y().create_impl_operand(_impl.get()); + auto impl_z = sampler.z().create_impl_operand(_impl.get()); + auto impl_b = sampler.b().create_impl_operand(_impl.get()); + + auto impl_dst = tile.create_impl_operand(_impl.get()); + + _impl->op_load_indirect(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b); +} + +void KernelWriter::util_get_indirect_buffer(TileOperand &tile, + const TensorOperand &tensor, + const TensorTileSampler &sampler, + const TileOperand &x, + const TileOperand &y, + const TileOperand &x_off, + const TileOperand &y_off) +{ + prototype::TensorOperand impl_tensor( + tensor.name(), + prototype::GpuSampler{ + sampler.format(), + prototype::to_gpu_tensor_storage(tensor.storage_type()), + sampler.address_mode_x(), + sampler.address_mode_y(), + sampler.address_mode_z() }); + + auto impl_x = x.create_impl_operand(_impl.get()); + auto impl_y = y.create_impl_operand(_impl.get()); + auto impl_x_off = x_off.create_impl_operand(_impl.get()); + auto impl_y_off = y_off.create_impl_operand(_impl.get()); + auto impl_dst = tile.create_impl_operand(_impl.get()); - _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b); + _impl->util_get_indirect_buffer(impl_dst, impl_tensor, impl_x, impl_y, impl_x_off, impl_y_off); } void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler) |