Port DirectConv2d to CKW backend

Ports the direct convolution 2D kernel from the experimental Dynamic Fusion interface to use the new Compute Kernel Writer backend for OpenCL code generation. Support is for FP16/FP32 only. Resolves: COMPMID-6259 Change-Id: Ia8d7b9cb789737b22b1d877cd798a73eda0ce4ab Signed-off-by: Jakub Sujak <jakub.sujak@arm.com> Reviewed-on: https://review.mlplatform.org/c/ml/ComputeLibrary/+/10059 Reviewed-by: Gunes Bayir <gunes.bayir@arm.com> Reviewed-by: Gian Marco Iodice <gianmarco.iodice@arm.com> Comments-Addressed: Arm Jenkins <bsgcomp@arm.com> Tested-by: Arm Jenkins <bsgcomp@arm.com> Benchmark: Arm Jenkins <bsgcomp@arm.com>
author: Jakub Sujak <jakub.sujak@arm.com> 2023-07-31 13:36:58 +0100
committer: Jakub Sujak <jakub.sujak@arm.com> 2023-08-07 08:44:17 +0000
commit: e1c96e7e6dbf5314676fc81831e2ccb34a031ea1 (patch)
tree: f69ee0ac5dd6b29de0041293b279804bcecf0df7 /compute_kernel_writer/prototype/src
parent: 78ce2730ecd2f1e666cdd10263bf054c0b740a9c (diff)
download: ComputeLibrary-e1c96e7e6dbf5314676fc81831e2ccb34a031ea1.tar.gz
3 files changed, 109 insertions, 24 deletions
diff --git a/compute_kernel_writer/prototype/src/KernelWriter.cpp b/compute_kernel_writer/prototype/src/KernelWriter.cpp
index 9122e518b4..f29cf12802 100644
--- a/compute_kernel_writer/prototype/src/KernelWriter.cpp
+++ b/compute_kernel_writer/prototype/src/KernelWriter.cpp
@@ -128,6 +128,10 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> ope
             name,
             prototype::TileInfo(info.data_type(), info.width(), info.height()));
     }
+    else
+    {
+        _impl->declare_const_tile(name, operand.value(), operand.data_type());
+    }
 
     return operand;
 }
@@ -136,7 +140,7 @@ TileOperand &KernelWriter::declare_tile_operand(std::unique_ptr<TileOperand> ope
 // Load and store
 // =================================================================================================
 
-void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const TensorTileSampler &sampler)
+void KernelWriter::op_load(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler, const TileOperand &dilation_y)
 {
     prototype::TensorOperand impl_tensor(
         tensor.name(),
@@ -152,9 +156,59 @@ void KernelWriter::op_load(TileOperand &tile, TensorOperand &tensor, const Tenso
     auto impl_z = sampler.z().create_impl_operand(_impl.get());
     auto impl_b = sampler.b().create_impl_operand(_impl.get());
 
+    auto impl_dilation_y = dilation_y.create_impl_operand(_impl.get());
+
+    auto impl_dst = tile.create_impl_operand(_impl.get());
+
+    _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b, impl_dilation_y);
+}
+
+void KernelWriter::op_load_indirect(TileOperand &tile, const TensorOperand &tensor, const TensorTileSampler &sampler)
+{
+    prototype::TensorOperand impl_tensor(
+        tensor.name(),
+        prototype::GpuSampler{
+            sampler.format(),
+            prototype::to_gpu_tensor_storage(tensor.storage_type()),
+            sampler.address_mode_x(),
+            sampler.address_mode_y(),
+            sampler.address_mode_z() });
+
+    auto impl_x = sampler.x().create_impl_operand(_impl.get());
+    auto impl_y = sampler.y().create_impl_operand(_impl.get());
+    auto impl_z = sampler.z().create_impl_operand(_impl.get());
+    auto impl_b = sampler.b().create_impl_operand(_impl.get());
+
+    auto impl_dst = tile.create_impl_operand(_impl.get());
+
+    _impl->op_load_indirect(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b);
+}
+
+void KernelWriter::util_get_indirect_buffer(TileOperand             &tile,
+                                            const TensorOperand     &tensor,
+                                            const TensorTileSampler &sampler,
+                                            const TileOperand       &x,
+                                            const TileOperand       &y,
+                                            const TileOperand       &x_off,
+                                            const TileOperand       &y_off)
+{
+    prototype::TensorOperand impl_tensor(
+        tensor.name(),
+        prototype::GpuSampler{
+            sampler.format(),
+            prototype::to_gpu_tensor_storage(tensor.storage_type()),
+            sampler.address_mode_x(),
+            sampler.address_mode_y(),
+            sampler.address_mode_z() });
+
+    auto impl_x     = x.create_impl_operand(_impl.get());
+    auto impl_y     = y.create_impl_operand(_impl.get());
+    auto impl_x_off = x_off.create_impl_operand(_impl.get());
+    auto impl_y_off = y_off.create_impl_operand(_impl.get());
+
     auto impl_dst = tile.create_impl_operand(_impl.get());
 
-    _impl->op_load_immediate(impl_tensor, impl_dst, impl_x, impl_y, impl_z, impl_b);
+    _impl->util_get_indirect_buffer(impl_dst, impl_tensor, impl_x, impl_y, impl_x_off, impl_y_off);
 }
 
 void KernelWriter::op_store(TensorOperand &tensor, const TileOperand &tile, const TensorTileSampler &sampler)
diff --git a/compute_kernel_writer/prototype/src/Prototype.h b/compute_kernel_writer/prototype/src/Prototype.h
index a8dc7fbfdb..2b519471ac 100644
--- a/compute_kernel_writer/prototype/src/Prototype.h
+++ b/compute_kernel_writer/prototype/src/Prototype.h
@@ -3009,7 +3009,7 @@ private:
             address += " + (";
             address += x + ") * sizeof(" + dst_type + ")";
         }
-        if(y != "0" && (_mapper.is_one_component_y() != true))
+        if(y != "0")
         {
             const std::string stride_y = _mapper.tensor_component_stride_y();
             address += " + (";
@@ -3249,7 +3249,7 @@ private:
         std::string coord_x = "(" + x + ") >> 2";
         std::string coord_y = "(";
 
-        if(y != "0" && (_mapper.is_one_component_y() != true))
+        if(y != "0")
         {
             coord_y += y;
         }
@@ -4024,13 +4024,6 @@ public:
             _data->code += ", ";
             _data->code += x_s->scalar(0, i).str;
             _data->code += " >= 0);\n";
-            // mi_0 = select(wxh, mi_0, y_s >= 0);
-            _data->code += dst->scalar(0, i).str;
-            _data->code += " = select(-1, ";
-            _data->code += dst->scalar(0, i).str;
-            _data->code += ", ";
-            _data->code += y_s->scalar(0, i).str;
-            _data->code += " >= 0);\n";
             // mi_0 = select(wxh, mi_0, x_s < width);
             _data->code += dst->scalar(0, i).str;
             _data->code += " = select(-1, ";
@@ -4039,6 +4032,13 @@ public:
             _data->code += x_s->scalar(0, i).str;
             _data->code += " < ";
             _data->code += width + ");\n";
+            // mi_0 = select(wxh, mi_0, y_s >= 0);
+            _data->code += dst->scalar(0, i).str;
+            _data->code += " = select(-1, ";
+            _data->code += dst->scalar(0, i).str;
+            _data->code += ", ";
+            _data->code += y_s->scalar(0, i).str;
+            _data->code += " >= 0);\n";
             // mi_0 = select(wxh, mi_0, y_s < height);
             _data->code += dst->scalar(0, i).str;
             _data->code += " = select(-1, ";
diff --git a/compute_kernel_writer/prototype/src/TileOperand.cpp b/compute_kernel_writer/prototype/src/TileOperand.cpp
index fcb3cb6415..bf6a15b9df 100644
--- a/compute_kernel_writer/prototype/src/TileOperand.cpp
+++ b/compute_kernel_writer/prototype/src/TileOperand.cpp
@@ -30,22 +30,42 @@ namespace ckw
 {
 
 TileOperand::TileOperand(const std::string &name, const TileInfo &info)
-    : OperandBase(name), _info(info), _value{ 0 }, _constant(false)
+    : OperandBase(name),
+      _info(info),
+      _value{ std::vector<std::string>{ "0" } },
+      _constant(false)
 {
 }
 
 TileOperand::TileOperand(const std::string &name, DataType data_type)
-    : OperandBase(name), _info(TileInfo{ data_type }), _value(0), _constant(false)
+    : OperandBase(name),
+      _info(TileInfo{ data_type }),
+      _value{ std::vector<std::string>{ "0" } },
+      _constant(false)
 {
 }
 
 TileOperand::TileOperand(const std::string &name, int32_t value)
-    : OperandBase(name), _info(TileInfo{ DataType::Int32 }), _value(value), _constant(true)
+    : OperandBase(name),
+      _info(TileInfo{ DataType::Int32 }),
+      _value{ std::vector<std::string>{ std::to_string(value) } },
+      _constant(true)
 {
 }
 
 TileOperand::TileOperand(const std::string &name, float value)
-    : OperandBase(name), _info(TileInfo{ DataType::Fp32 }), _value(value), _constant(true)
+    : OperandBase(name),
+      _info(TileInfo{ DataType::Fp32 }),
+      _value{ std::vector<std::string>{ std::to_string(value) } },
+      _constant(true)
+{
+}
+
+TileOperand::TileOperand(const std::string &name, const TileContainer &vals, DataType dt)
+    : OperandBase(name),
+      _info(TileInfo{ dt, static_cast<int32_t>(vals.size()), static_cast<int32_t>(vals[0].size()) }),
+      _value(vals),
+      _constant(true)
 {
 }
 
@@ -55,17 +75,23 @@ prototype::Operand TileOperand::create_impl_operand(prototype::IGpuKernelWriter
 
     if(_constant)
     {
-        switch(_info.data_type())
+        if(is_scalar())
         {
-            case DataType::Int32:
-                return prototype::Operand(std::to_string(_value.get<int32_t>()),
-                                          prototype::OperandType::ScalarInt32);
+            switch(_info.data_type())
+            {
+                case DataType::Int32:
+                    return prototype::Operand(_value[0][0], prototype::OperandType::ScalarInt32);
 
-            case DataType::Fp32:
-                return prototype::Operand(std::to_string(_value.get<float>()), prototype::OperandType::ScalarFp32);
+                case DataType::Fp32:
+                    return prototype::Operand(_value[0][0], prototype::OperandType::ScalarFp32);
 
-            default:
-                CKW_ASSERT(false);
+                default:
+                    CKW_ASSERT(false);
+            }
+        }
+        else
+        {
+            return prototype::Operand(name());
         }
     }
     else
@@ -94,11 +120,16 @@ bool TileOperand::is_scalar() const
     return _info.width() == 1 && _info.height() == 1;
 }
 
-ScalarValue TileOperand::scalar_value() const
+std::string TileOperand::scalar_value() const
 {
     CKW_ASSERT(is_scalar());
     CKW_ASSERT(is_constant());
 
+    return _value[0][0];
+}
+
+const TileContainer &TileOperand::value() const
+{
     return _value;
 }
author	Jakub Sujak <jakub.sujak@arm.com>	2023-07-31 13:36:58 +0100
committer	Jakub Sujak <jakub.sujak@arm.com>	2023-08-07 08:44:17 +0000
commit	e1c96e7e6dbf5314676fc81831e2ccb34a031ea1 (patch)
tree	f69ee0ac5dd6b29de0041293b279804bcecf0df7 /compute_kernel_writer/prototype/src
parent	78ce2730ecd2f1e666cdd10263bf054c0b740a9c (diff)
download	ComputeLibrary-e1c96e7e6dbf5314676fc81831e2ccb34a031ea1.tar.gz